Spaces:

MohammedHamdy32
/

IDs_demo

Sleeping

App Files Files Community

IDs_demo / recognization /recognization.py

MohammedHamdy32

changing the model path in recognization.py

a03f138 verified 11 months ago

raw

history blame contribute delete

3.41 kB

	from .config import Config
	from .model import Text_recognization_model
	import os
	import torch

	from .utils import CTCLabelConverter,Averager

	from PIL import Image
	import math
	import numpy as np
	from .dataset import NormalizePAD
	import tempfile


	import os
	import math
	import numpy as np
	from PIL import Image
	import torch

	class TextRecognition:
	def __init__(self):
	# Initialize configuration
	self.opt = Config()

	# Load characters from UrduGlyphs.txt
	current_dir = os.path.dirname(os.path.abspath(__file__))
	file_path = os.path.join(current_dir, "UrduGlyphs.txt")
	with open(file_path, "r", encoding="utf-8") as file:
	content = file.readlines()
	self.opt.character = ''.join([str(elem).strip('\n') for elem in content]) + " "

	# Initialize converter based on prediction type
	if 'CTC' in self.opt.Prediction:
	self.converter = CTCLabelConverter(self.opt.character)
	else:
	self.converter = AttnLabelConverter(self.opt.character)

	# Set the number of classes
	self.opt.num_class = len(self.converter.character)

	# Load the model
	model_path = os.path.join(current_dir, "../recognization_model.pth")
	self.model = Text_recognization_model(self.opt)

	self.model.load_state_dict(torch.load(model_path, map_location=self.opt.device, weights_only=True)) # Load weights
	self.model = self.model.to(self.opt.device)
	self.model.eval() # Set model to evaluation mode

	def recognize_image(self, image):
	# Preprocess the input image
	if isinstance(image, str): # If the input is a file path
	pil_image = Image.open(image).convert('L') # Convert to grayscale
	elif isinstance(image, np.ndarray): # If the input is a NumPy array
	if len(image.shape) == 3 and image.shape[2] == 3: # RGB image
	# Convert RGB to grayscale using weights
	gray_array = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
	pil_image = Image.fromarray(gray_array.astype('uint8')) # Convert to PIL Image
	elif len(image.shape) == 2: # Already grayscale
	pil_image = Image.fromarray(image.astype('uint8')) # Convert to PIL Image
	else:
	raise ValueError("Unsupported image format!")
	else:
	raise TypeError("Input must be a file path (str) or a NumPy array.")

	# Preprocess and resize image
	pil_image = pil_image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
	w, h = pil_image.size
	ratio = w / float(h)

	if math.ceil(self.opt.imgH * ratio) > self.opt.imgW:
	resized_w = self.opt.imgW
	else:
	resized_w = math.ceil(self.opt.imgH * ratio)
	pil_image = pil_image.resize((resized_w, self.opt.imgH), Image.Resampling.BICUBIC)

	# Normalize and prepare image for the model
	transform = NormalizePAD((1, self.opt.imgH, self.opt.imgW))
	img = transform(pil_image)
	img = img.unsqueeze(0) # Add batch dimension
	img = img.to(self.opt.device)

	# Perform prediction
	preds = self.model(img)
	preds_size = torch.IntTensor([preds.size(1)])
	_, preds_index = preds.max(2)
	preds_str = self.converter.decode(preds_index.data, preds_size.data)[0]

	return preds_str