lanpip / test /ocr_test.py
Parechan's picture
Upload 35 files
0c84ee8 verified
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
sys.path.append('../src')
from ocr.normalization import word_normalization, letter_normalization
from ocr import page, words, characters
from ocr.helpers import implt, resize
from ocr.tfhelpers import Model
from ocr.datahelpers import idx2char
IMG = '../data/test.jpg' # 1, 2, 3
LANG = 'en'
# You can use only one of these two
# You HABE TO train the CTC model by yourself using word_classifier_CTC.ipynb
MODEL_LOC_CHARS = f'../models/char-clas/{LANG}/CharClassifier'
MODEL_LOC_CTC = '../models/word-clas/CTC/Classifier1'
CHARACTER_MODEL = Model(MODEL_LOC_CHARS)
CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction')
image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB)
# implt(image)
# Crop image and get bounding boxes
crop = page.detection(image)
# implt(crop)
boxes = words.detection(crop)
lines = words.sort_words(boxes)
def recognise(img):
"""Recognising words using CTC Model."""
img = word_normalization(
img,
64,
border=False,
tilt=False,
hyst_norm=False)
length = img.shape[1]
# Input has shape [batch_size, height, width, 1]
input_imgs = np.zeros(
(1, 64, length, 1), dtype=np.uint8)
input_imgs[0][:, :length, 0] = img
pred = CTC_MODEL.eval_feed({
'inputs:0': input_imgs,
'inputs_length:0': [length],
'keep_prob:0': 1})[0]
word = ''
for i in pred:
word += idx2char(i + 1)
return word
# implt(crop)
for line in lines:
print(" ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line]))