# 改变图像的位深度: https://blog.csdn.net/weixin_39190382/article/details/105917690
img = Image.open('/tmp/wencai/tmp.png')
img = img.resize((192, 64))
img = img.convert("RGB")
# print(img.getbands())
print(np.array(img).shape)
img.save('/tmp/wencai/middle.png')

image = pil_to_tensor(img)
to_pil_image(image).save('/tmp/wencai/2.png')


print(image.numpy())
# image = torch.cat((image, image, image), 0)
# print(image.shape)
image = image.float()
print(image.numpy())

to_pil_image(image).save('/tmp/wencai/output.png')

图像有大小 ((width, height)) 和深度(getbands)两个属性

问题是转为tensor之后,如果不加image = image.float(),则会出现报错:

RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same

按照网上教程,应该使用img.float,将字节型的tensor转化为float型的tensor。然而转换之后的 output.png 和 2.png 完全不同:

image-20211004151509368

左侧是2.png,右侧是float之后的,明显不是一个东西?

于是对比了一下测试随机生成用例:似乎只需要在原有的基础上除以256即可

image-20211004152430210

image-20211004152540689

诶,正常了!虽然最后预测的结果是pred: t1jt并不满足,模型需要更多的训练

lowercase = True
char = Char(lowercase)
width, height, n_len, n_classes = 192, 64, 4, char.length  # 192 64
n_input_length = 12

filepath = os.path.dirname(os.path.abspath(__file__))  # 1. 源文件夹
sys.path.append(f'{filepath}/captcha_break/code/')
model = torch.load(f'{filepath}/captcha_break/model/ctc_lower_2021.pth')
model.eval()


def main():
    # 改变图像的位深度: https://blog.csdn.net/weixin_39190382/article/details/105917690
    img = Image.open('/tmp/wencai/tmp.png')
    img = img.resize((192, 64))
    img = img.convert("RGB")
    # print(img.getbands())
    print(np.array(img).shape)
    img.save('/tmp/wencai/1.png')

    image = pil_to_tensor(img)
    to_pil_image(image).save('/tmp/wencai/2.png')

    print(image.numpy())
    # image = torch.cat((image, image, image), 0)
    # print(image.shape)
    image = image.float() / 256
    print(image.numpy())
    to_pil_image(image).save('/tmp/wencai/3.png')

    output = model(image.unsqueeze(0).cuda())
    output_argmax = output.detach().permute(1, 0, 2).argmax(dim=-1)
    print('pred:', char.decode(output_argmax[0]))