Spaces:
Sleeping
Sleeping
| from .config import Config | |
| from .model import Text_recognization_model | |
| import os | |
| import torch | |
| from .utils import CTCLabelConverter,Averager | |
| from PIL import Image | |
| import math | |
| import numpy as np | |
| from .dataset import NormalizePAD | |
| import tempfile | |
| import os | |
| import math | |
| import numpy as np | |
| from PIL import Image | |
| import torch | |
| class TextRecognition: | |
| def __init__(self): | |
| # Initialize configuration | |
| self.opt = Config() | |
| # Load characters from UrduGlyphs.txt | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| file_path = os.path.join(current_dir, "UrduGlyphs.txt") | |
| with open(file_path, "r", encoding="utf-8") as file: | |
| content = file.readlines() | |
| self.opt.character = ''.join([str(elem).strip('\n') for elem in content]) + " " | |
| # Initialize converter based on prediction type | |
| if 'CTC' in self.opt.Prediction: | |
| self.converter = CTCLabelConverter(self.opt.character) | |
| else: | |
| self.converter = AttnLabelConverter(self.opt.character) | |
| # Set the number of classes | |
| self.opt.num_class = len(self.converter.character) | |
| # Load the model | |
| model_path = os.path.join(current_dir, "../recognization_model.pth") | |
| self.model = Text_recognization_model(self.opt) | |
| self.model.load_state_dict(torch.load(model_path, map_location=self.opt.device, weights_only=True)) # Load weights | |
| self.model = self.model.to(self.opt.device) | |
| self.model.eval() # Set model to evaluation mode | |
| def recognize_image(self, image): | |
| # Preprocess the input image | |
| if isinstance(image, str): # If the input is a file path | |
| pil_image = Image.open(image).convert('L') # Convert to grayscale | |
| elif isinstance(image, np.ndarray): # If the input is a NumPy array | |
| if len(image.shape) == 3 and image.shape[2] == 3: # RGB image | |
| # Convert RGB to grayscale using weights | |
| gray_array = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140]) | |
| pil_image = Image.fromarray(gray_array.astype('uint8')) # Convert to PIL Image | |
| elif len(image.shape) == 2: # Already grayscale | |
| pil_image = Image.fromarray(image.astype('uint8')) # Convert to PIL Image | |
| else: | |
| raise ValueError("Unsupported image format!") | |
| else: | |
| raise TypeError("Input must be a file path (str) or a NumPy array.") | |
| # Preprocess and resize image | |
| pil_image = pil_image.transpose(Image.Transpose.FLIP_LEFT_RIGHT) | |
| w, h = pil_image.size | |
| ratio = w / float(h) | |
| if math.ceil(self.opt.imgH * ratio) > self.opt.imgW: | |
| resized_w = self.opt.imgW | |
| else: | |
| resized_w = math.ceil(self.opt.imgH * ratio) | |
| pil_image = pil_image.resize((resized_w, self.opt.imgH), Image.Resampling.BICUBIC) | |
| # Normalize and prepare image for the model | |
| transform = NormalizePAD((1, self.opt.imgH, self.opt.imgW)) | |
| img = transform(pil_image) | |
| img = img.unsqueeze(0) # Add batch dimension | |
| img = img.to(self.opt.device) | |
| # Perform prediction | |
| preds = self.model(img) | |
| preds_size = torch.IntTensor([preds.size(1)]) | |
| _, preds_index = preds.max(2) | |
| preds_str = self.converter.decode(preds_index.data, preds_size.data)[0] | |
| return preds_str | |