# 改变图像的位深度: https://blog.csdn.net/weixin_39190382/article/details/105917690
img = Image.open('/tmp/wencai/tmp.png')
img = img.resize((192, 64))
img = img.convert("RGB")
# print(img.getbands())
print(np.array(img).shape)
img.save('/tmp/wencai/middle.png')
image = pil_to_tensor(img)
to_pil_image(image).save('/tmp/wencai/2.png')
print(image.numpy())
# image = torch.cat((image, image, image), 0)
# print(image.shape)
image = image.float()
print(image.numpy())
to_pil_image(image).save('/tmp/wencai/output.png')
图像有大小 ((width, height)) 和深度(getbands)两个属性
问题是转为tensor之后,如果不加image = image.float()
,则会出现报错:
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same
按照网上教程,应该使用img.float,将字节型的tensor转化为float型的tensor。然而转换之后的 output.png 和 2.png 完全不同:
左侧是2.png,右侧是float之后的,明显不是一个东西?
于是对比了一下测试随机生成用例:似乎只需要在原有的基础上除以256即可
诶,正常了!虽然最后预测的结果是pred: t1jt
并不满足,模型需要更多的训练
lowercase = True
char = Char(lowercase)
width, height, n_len, n_classes = 192, 64, 4, char.length # 192 64
n_input_length = 12
filepath = os.path.dirname(os.path.abspath(__file__)) # 1. 源文件夹
sys.path.append(f'{filepath}/captcha_break/code/')
model = torch.load(f'{filepath}/captcha_break/model/ctc_lower_2021.pth')
model.eval()
def main():
# 改变图像的位深度: https://blog.csdn.net/weixin_39190382/article/details/105917690
img = Image.open('/tmp/wencai/tmp.png')
img = img.resize((192, 64))
img = img.convert("RGB")
# print(img.getbands())
print(np.array(img).shape)
img.save('/tmp/wencai/1.png')
image = pil_to_tensor(img)
to_pil_image(image).save('/tmp/wencai/2.png')
print(image.numpy())
# image = torch.cat((image, image, image), 0)
# print(image.shape)
image = image.float() / 256
print(image.numpy())
to_pil_image(image).save('/tmp/wencai/3.png')
output = model(image.unsqueeze(0).cuda())
output_argmax = output.detach().permute(1, 0, 2).argmax(dim=-1)
print('pred:', char.decode(output_argmax[0]))
评论区