specification
def transform_image(image_path):
transform = T.Compose([T.ToTensor()]) # T --> torchvision.transforms object
device = 'cuda' if torch.cuda.is_available() else 'cpu'
image = cv2.imread(image_path,cv2.IMREAD_GRAYSCALE)
image = Image.fromarray(image)
image = transform(image)
image = Variable(image).to(device)
image = image.unsqueeze(1)
return image
def get_label(model_prediction,max_captcha_len=6): #currently trained on max_len = 6
_cls = string.ascii_lowercase + string.ascii_uppercase + '0123456789' + '$'
lab = ''
for idx in range(max_captcha_len):
get_char = _cls[np.argmax(model_prediction.squeeze().cpu().tolist()[ _cls_dim * idx : _cls_dim * (idx + 1)])] #_cls_dim = len(_cls)
lab += get_char
return lab
model architecture : ResNet
Accuracy : 99.67
predictor = torch.jit.load('captcha_model_v1.0_traced.pt')
input_path = 'captcha-cnn/test_captcha/00oLkY.jpg'
input = transform_image(input_path)
with torch.no_grad():
output = predictor(input)
o_label = get_label(output)
print(o_label)
added more models as well as huggingface space to try out.
one can try to finetune parseq model to create generalized version of captcha prediction.
