MohammedHamdy32's picture
Add Egyption ID information Extraction
77f8d5f
from .config import Config
from .model import Text_recognization_model
import os
import torch
from .utils import CTCLabelConverter,Averager
from PIL import Image
import math
import numpy as np
from .dataset import NormalizePAD
import tempfile
import os
import math
import numpy as np
from PIL import Image
import torch
class TextRecognition:
def __init__(self,model_path='model/recognization_model.pth' , device='cpu' ):
# Initialize configuration
self.opt = Config()
self.opt.device = device
self.model_path = model_path
# Load characters from UrduGlyphs.txt
current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(current_dir, "words.txt")
with open(file_path, "r", encoding="utf-8") as file:
content = file.readlines()
self.opt.character = ''.join([str(elem).strip('\n') for elem in content]) + " "
# Initialize converter based on prediction type
if 'CTC' in self.opt.Prediction:
self.converter = CTCLabelConverter(self.opt.character)
else:
self.converter = AttnLabelConverter(self.opt.character)
# Set the number of classes
self.opt.num_class = len(self.converter.character)
# Load the model
model_path = os.path.join(current_dir, self.model_path)
self.model = Text_recognization_model(self.opt)
self.model.load_state_dict(torch.load(model_path, map_location=self.opt.device, weights_only=True)) # Load weights
self.model = self.model.to(self.opt.device)
self.model.eval() # Set model to evaluation mode
def recognize_image(self, image):
# Preprocess the input image
if isinstance(image, str): # If the input is a file path
pil_image = Image.open(image).convert('L') # Convert to grayscale
elif isinstance(image, np.ndarray): # If the input is a NumPy array
if len(image.shape) == 3 and image.shape[2] == 3: # RGB image
# Convert RGB to grayscale using weights
gray_array = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
pil_image = Image.fromarray(gray_array.astype('uint8')) # Convert to PIL Image
elif len(image.shape) == 2: # Already grayscale
pil_image = Image.fromarray(image.astype('uint8')) # Convert to PIL Image
else:
raise ValueError("Unsupported image format!")
else:
raise TypeError("Input must be a file path (str) or a NumPy array.")
# Preprocess and resize image
pil_image = pil_image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
w, h = pil_image.size
ratio = w / float(h)
if math.ceil(self.opt.imgH * ratio) > self.opt.imgW:
resized_w = self.opt.imgW
else:
resized_w = math.ceil(self.opt.imgH * ratio)
pil_image = pil_image.resize((resized_w, self.opt.imgH), Image.Resampling.BICUBIC)
# Normalize and prepare image for the model
transform = NormalizePAD((1, self.opt.imgH, self.opt.imgW))
img = transform(pil_image)
img = img.unsqueeze(0) # Add batch dimension
img = img.to(self.opt.device)
# Perform prediction
preds = self.model(img)
preds_size = torch.IntTensor([preds.size(1)])
_, preds_index = preds.max(2)
preds_str = self.converter.decode(preds_index.data, preds_size.data)[0]
return preds_str