File size: 3,519 Bytes
77f8d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from .config import Config
from .model import Text_recognization_model
import os
import torch

from .utils import CTCLabelConverter,Averager

from PIL import Image
import math
import numpy as np
from .dataset import NormalizePAD
import tempfile


import os
import math
import numpy as np
from PIL import Image
import torch

class TextRecognition:
    def __init__(self,model_path='model/recognization_model.pth' , device='cpu' ):
        # Initialize configuration
        self.opt = Config()
        self.opt.device = device
        self.model_path = model_path

        # Load characters from UrduGlyphs.txt
        current_dir = os.path.dirname(os.path.abspath(__file__))
        file_path = os.path.join(current_dir, "words.txt")
        with open(file_path, "r", encoding="utf-8") as file:
            content = file.readlines()
        self.opt.character = ''.join([str(elem).strip('\n') for elem in content]) + " "

        # Initialize converter based on prediction type
        if 'CTC' in self.opt.Prediction:
            self.converter = CTCLabelConverter(self.opt.character)
        else:
            self.converter = AttnLabelConverter(self.opt.character)

        # Set the number of classes
        self.opt.num_class = len(self.converter.character)

        # Load the model
        model_path = os.path.join(current_dir, self.model_path)
        self.model = Text_recognization_model(self.opt)

        self.model.load_state_dict(torch.load(model_path, map_location=self.opt.device, weights_only=True))  # Load weights
        self.model = self.model.to(self.opt.device)
        self.model.eval()  # Set model to evaluation mode

    def recognize_image(self, image):
        # Preprocess the input image
        if isinstance(image, str):  # If the input is a file path
            pil_image = Image.open(image).convert('L')  # Convert to grayscale
        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
            if len(image.shape) == 3 and image.shape[2] == 3:  # RGB image
                # Convert RGB to grayscale using weights
                gray_array = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
                pil_image = Image.fromarray(gray_array.astype('uint8'))  # Convert to PIL Image
            elif len(image.shape) == 2:  # Already grayscale
                pil_image = Image.fromarray(image.astype('uint8'))  # Convert to PIL Image
            else:
                raise ValueError("Unsupported image format!")
        else:
            raise TypeError("Input must be a file path (str) or a NumPy array.")

        # Preprocess and resize image
        pil_image = pil_image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
        w, h = pil_image.size
        ratio = w / float(h)

        if math.ceil(self.opt.imgH * ratio) > self.opt.imgW:
            resized_w = self.opt.imgW
        else:
            resized_w = math.ceil(self.opt.imgH * ratio)
        pil_image = pil_image.resize((resized_w, self.opt.imgH), Image.Resampling.BICUBIC)

        # Normalize and prepare image for the model
        transform = NormalizePAD((1, self.opt.imgH, self.opt.imgW))
        img = transform(pil_image)
        img = img.unsqueeze(0)  # Add batch dimension
        img = img.to(self.opt.device)

        # Perform prediction
        preds = self.model(img)
        preds_size = torch.IntTensor([preds.size(1)])
        _, preds_index = preds.max(2)
        preds_str = self.converter.decode(preds_index.data, preds_size.data)[0]

        return preds_str