diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..972f837b5596cda19e88e9ba28d3997525e77027
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# Ignore all .log files
+*.pt
+*.pth
+models
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..665bbb8cc00f9cff130dee6180600f3fc75721a6
--- /dev/null
+++ b/app.py
@@ -0,0 +1,50 @@
+import gradio as gr
+from code.detection.recognize_id.detect_and_recognize_id import Recognize_ID
+from code.detection.detection import detection
+from code.recognization.recognization import TextRecognition
+import os
+
+# Define a dummy prediction function
+def predict_image(image):
+
+    # Recognize ID
+    rec_id = Recognize_ID()
+    id = rec_id.give_me_id_number(image)
+
+    # Detection
+    det = detection()
+    detection_list = det.full_pipeline(image)
+
+    result = ''
+    # Loop on all detected images and recognize them
+    recognizer = TextRecognition()
+    for line in detection_list[2:6]:
+        for word in line:
+            recognized_word = recognizer.recognize_image(word)
+            result = result + recognized_word + ' '
+        result += '\n'    
+    
+    # Add Id number 
+    result = result + id
+
+    return result
+
+# List of paths to your sample images
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sample_images = [
+    os.path.join(current_dir , "samples/id_1.png" )
+]
+
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=predict_image,  # Function to run    
+    inputs="image",    # Input type
+    outputs="text",    # Output type
+    title="Recognization",  
+    description="Upload an image",
+    examples=sample_images
+)
+
+# Launch the app
+interface.launch()
+
diff --git a/code/__init__.py b/code/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/code/__pycache__/__init__.cpython-310.pyc b/code/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..209d20f5d54c51608cfb741f36ecef2026601841
Binary files /dev/null and b/code/__pycache__/__init__.cpython-310.pyc differ
diff --git a/code/detection/__pycache__/detection.cpython-310.pyc b/code/detection/__pycache__/detection.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dfa83b2aedff3ecd8d0aa2324149475966f20423
Binary files /dev/null and b/code/detection/__pycache__/detection.cpython-310.pyc differ
diff --git a/code/detection/detection.py b/code/detection/detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6cb924b369e536b91f2858d338cd7066ce756dd
--- /dev/null
+++ b/code/detection/detection.py
@@ -0,0 +1,77 @@
+from ultralytics import YOLO
+from glob import glob
+import matplotlib.pyplot as plt
+import cv2
+import os
+from PIL import Image
+from ultralytics.engine.results import Results
+import numpy as np
+
+
+class detection:
+
+    def __init__(self,model_path='detection.pt'):
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(current_dir , model_path )
+        self.model = YOLO(model_path)
+
+    def get_distance(self,res):
+        boxes = res[0].boxes.xywh.numpy()  # Convert to numpy array
+        # Sort primarily by Y (vertical), then X (horizontal) using lexsort
+        sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))
+        sorted_boxes = boxes[sorted_indices]
+        return sorted_boxes[:, 1], sorted_indices  # Return sorted Y values and indices
+
+    def handle_the_boxes(self,res, img, y_threshold=30):
+        distance_sorted, sorted_indices = self.get_distance(res)
+        PB = res[0].boxes.xyxy.numpy()[sorted_indices]  # Get boxes in sorted order
+        same_object = []
+        current_line = [PB[0]]
+
+        # Group boxes into lines using Y threshold
+        for i in range(1, len(PB)):
+            prev_y = current_line[-1][1]  # Use ymin from XYXY format
+            current_y = PB[i][1]
+            if abs(current_y - prev_y) > y_threshold:
+                # Sort line left-to-right before adding
+                current_line = sorted(current_line, key=lambda x: x[0] , reverse=True)
+                same_object.append(current_line)
+                current_line = [PB[i]]
+            else:
+                current_line.append(PB[i])
+
+        # Add the last line and sort it
+        if current_line:
+            current_line = sorted(current_line, key=lambda x: x[0])
+            same_object.append(current_line)
+
+        # Extract word images in final order
+        return [
+            [self.words_pixels(img, box) for box in line]
+            for line in same_object
+        ]
+
+    # Keep words_pixels as original
+    def words_pixels(self,img, xyxy):
+        xmin, ymin, xmax, ymax = xyxy.tolist()
+        return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1]
+
+    def full_pipeline(self,image,show=False):
+
+        if isinstance(image, str):  # If the input is a file path
+            img  = cv2.imread(image)
+        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
+            image = image
+            img = image
+    
+        res = self.model(image)
+
+        if show:
+            res[0].show()
+
+
+        return  self.handle_the_boxes(res , img)
+
+
+   
+    
\ No newline at end of file
diff --git a/code/detection/recognize_id/__pycache__/detect_and_recognize_id.cpython-310.pyc b/code/detection/recognize_id/__pycache__/detect_and_recognize_id.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..22304a52210bfc76499801bcf7fff3eae4e3b5d5
Binary files /dev/null and b/code/detection/recognize_id/__pycache__/detect_and_recognize_id.cpython-310.pyc differ
diff --git a/code/detection/recognize_id/data/id_1.png b/code/detection/recognize_id/data/id_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4dc92b97fca873a8fd6934610c8e99cc5fcbf8e
Binary files /dev/null and b/code/detection/recognize_id/data/id_1.png differ
diff --git a/code/detection/recognize_id/detect_and_recognize_id.py b/code/detection/recognize_id/detect_and_recognize_id.py
new file mode 100644
index 0000000000000000000000000000000000000000..c38ccde2824246b4248725ee372c6b805e531395
--- /dev/null
+++ b/code/detection/recognize_id/detect_and_recognize_id.py
@@ -0,0 +1,36 @@
+from ultralytics import YOLO
+from ultralytics.engine.results import Results
+import cv2
+import os
+import numpy as np
+
+class Recognize_ID:
+
+    def __init__(self,model_path='recognization_id.pt'):
+
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(current_dir , model_path )
+        self.model = YOLO(model=model_path )
+   
+    def give_me_id_number(self,image:str):
+        """
+        image_dir : input image directory
+        model : yolo model
+        """
+        if isinstance(image, str):  # If the input is a file path
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            image_path = os.path.join(current_dir , image )
+            img = cv2.imread(image_path)
+        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
+            img = image
+
+        print(type(img))
+        res = self.model(img)
+        boxes = res[0].boxes.xywh[::,0].tolist()
+        classes = res[0].boxes.cls.tolist()
+        boxes_labels =[(int(key) , int(value)) for key , value in zip(boxes, classes)]
+        boxes_labels.sort()
+        national_id = "".join([str(i[1]) for  i in boxes_labels])
+
+        return national_id
+    
\ No newline at end of file
diff --git a/code/recognization/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/code/recognization/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245
--- /dev/null
+++ b/code/recognization/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/code/recognization/__pycache__/augmentation.cpython-310.pyc b/code/recognization/__pycache__/augmentation.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5af1414f3796a4eae38c8688cd02a550383c25c9
Binary files /dev/null and b/code/recognization/__pycache__/augmentation.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/config.cpython-310.pyc b/code/recognization/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..51a87ec3a0ccc3ac524aba7c6986b855a89bb7a1
Binary files /dev/null and b/code/recognization/__pycache__/config.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/custom_test.cpython-310.pyc b/code/recognization/__pycache__/custom_test.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e020acaec0da4e1d8abc9c4b45b8006485e25fd
Binary files /dev/null and b/code/recognization/__pycache__/custom_test.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/dataset.cpython-310.pyc b/code/recognization/__pycache__/dataset.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e51df8ed6ffd49d2625e12cb960f45db08fb61f3
Binary files /dev/null and b/code/recognization/__pycache__/dataset.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/densenet.cpython-310.pyc b/code/recognization/__pycache__/densenet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b64ced255432ff308391e8278347b4ac756f1a13
Binary files /dev/null and b/code/recognization/__pycache__/densenet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/dropout_layer.cpython-310.pyc b/code/recognization/__pycache__/dropout_layer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06de6d5dd2521a6ee62d0b1f62942faa3cb56151
Binary files /dev/null and b/code/recognization/__pycache__/dropout_layer.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/feature_extraction.cpython-310.pyc b/code/recognization/__pycache__/feature_extraction.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bff21b39a7cd312e5500faaf2fe613f8a422b047
Binary files /dev/null and b/code/recognization/__pycache__/feature_extraction.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/hrnet.cpython-310.pyc b/code/recognization/__pycache__/hrnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92d810caeae28a3aaf61228e63000553d30d2e60
Binary files /dev/null and b/code/recognization/__pycache__/hrnet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/inception_unet.cpython-310.pyc b/code/recognization/__pycache__/inception_unet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4260c91d50a105719d1c1a5bc77ced6d0e4ec4ed
Binary files /dev/null and b/code/recognization/__pycache__/inception_unet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/model.cpython-310.pyc b/code/recognization/__pycache__/model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2948126c22ecc33a5b5ff8a8bbfece283a84439
Binary files /dev/null and b/code/recognization/__pycache__/model.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/my_test.cpython-310.pyc b/code/recognization/__pycache__/my_test.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5324d9948252e22e29951d83a38d622aef6eae9c
Binary files /dev/null and b/code/recognization/__pycache__/my_test.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/prediction.cpython-310.pyc b/code/recognization/__pycache__/prediction.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94d0400a3600559692f49771537aaeb74df0dcd1
Binary files /dev/null and b/code/recognization/__pycache__/prediction.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/rcnn.cpython-310.pyc b/code/recognization/__pycache__/rcnn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24329e2201dcd529e41d36acc4afe4fcbee0f1a7
Binary files /dev/null and b/code/recognization/__pycache__/rcnn.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/recognization.cpython-310.pyc b/code/recognization/__pycache__/recognization.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..911d8be4425b964b232625833150e3fe3b924169
Binary files /dev/null and b/code/recognization/__pycache__/recognization.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/resnet.cpython-310.pyc b/code/recognization/__pycache__/resnet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bad720be92ac510c5a2cb834409f9398c869c5f7
Binary files /dev/null and b/code/recognization/__pycache__/resnet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/resunet.cpython-310.pyc b/code/recognization/__pycache__/resunet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2308ff5dc0319659b2d978c8e1e19ffe98266b0b
Binary files /dev/null and b/code/recognization/__pycache__/resunet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/sequence_modeling.cpython-310.pyc b/code/recognization/__pycache__/sequence_modeling.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..622e1a604b6aabede55f16a0ab808c6a9be611d6
Binary files /dev/null and b/code/recognization/__pycache__/sequence_modeling.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/unet.cpython-310.pyc b/code/recognization/__pycache__/unet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45a2fa5dec862f00a7b5fa09052449927f62dee0
Binary files /dev/null and b/code/recognization/__pycache__/unet.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/unet_attn.cpython-310.pyc b/code/recognization/__pycache__/unet_attn.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..885d45ee8912671290f7655fcb6f2d22b890c33d
Binary files /dev/null and b/code/recognization/__pycache__/unet_attn.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/unet_plus_plus.cpython-310.pyc b/code/recognization/__pycache__/unet_plus_plus.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc24f330fa39c125bac66063a5e30d801dbefa6b
Binary files /dev/null and b/code/recognization/__pycache__/unet_plus_plus.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/utils.cpython-310.pyc b/code/recognization/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..35ca1ebdccf26eb530b09f661968cf3394ebc890
Binary files /dev/null and b/code/recognization/__pycache__/utils.cpython-310.pyc differ
diff --git a/code/recognization/__pycache__/vgg.cpython-310.pyc b/code/recognization/__pycache__/vgg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..746508d9b03175d5be81a9ac9c4b9cc135a67d7c
Binary files /dev/null and b/code/recognization/__pycache__/vgg.cpython-310.pyc differ
diff --git a/code/recognization/augmentation.py b/code/recognization/augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c75388bac3c3f27562527d5a1e6024aae19c755
--- /dev/null
+++ b/code/recognization/augmentation.py
@@ -0,0 +1,134 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+from functools import partial
+import random as rnd
+import imgaug.augmenters as iaa
+import numpy as np
+from PIL import ImageFilter, Image
+from timm.data import auto_augment
+
+_OP_CACHE = {}
+
+def _get_op(key, factory):
+    try:
+        op = _OP_CACHE[key]
+    except KeyError:
+        op = factory()
+        _OP_CACHE[key] = op
+    return op
+
+
+def _get_param(level, img, max_dim_factor, min_level=1):
+    max_level = max(min_level, max_dim_factor * max(img.size))
+    return round(min(level, max_level))
+
+def gaussian_blur(img, radius, **__):
+    radius = _get_param(radius, img, 0.02)
+    key = 'gaussian_blur_' + str(radius)
+    op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
+    return img.filter(op)
+
+def motion_blur(img, k, **__):
+    k = _get_param(k, img, 0.08, 3) | 1  # bin to odd values
+    key = 'motion_blur_' + str(k)
+    op = _get_op(key, lambda: iaa.MotionBlur(k))
+    return Image.fromarray(op(image=np.asarray(img)))
+
+def gaussian_noise(img, scale, **_):
+    scale = _get_param(scale, img, 0.25) | 1  # bin to odd values
+    key = 'gaussian_noise_' + str(scale)
+    op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
+    return Image.fromarray(op(image=np.asarray(img)))
+
+def poisson_noise(img, lam, **_):
+    lam = _get_param(lam, img, 0.2) | 1  # bin to odd values
+    key = 'poisson_noise_' + str(lam)
+    op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
+    return Image.fromarray(op(image=np.asarray(img)))
+
+def salt_and_pepper_noise(image, prob=0.05):
+    if prob <= 0:
+        return image
+    arr = np.asarray(image)
+    original_dtype = arr.dtype
+    intensity_levels = 2 ** (arr[0, 0].nbytes * 8)
+    min_intensity = 0
+    max_intensity = intensity_levels - 1
+    random_image_arr = np.random.choice([min_intensity, 1, np.nan], p=[prob / 2, 1 - prob, prob / 2], size=arr.shape)
+    salt_and_peppered_arr = arr.astype(np.float) * random_image_arr
+    salt_and_peppered_arr = np.nan_to_num(salt_and_peppered_arr, nan=max_intensity).astype(original_dtype)
+    return Image.fromarray(salt_and_peppered_arr)
+
+def random_border_crop(image):
+    img_width,img_height = image.size
+    crop_left = int(img_width * rnd.uniform(0.0, 0.025))
+    crop_top = int(img_height * rnd.uniform(0.0, 0.075))            
+    crop_right = int(img_width * rnd.uniform(0.975, 1.0))
+    crop_bottom = int(img_height * rnd.uniform(0.925, 1.0))
+    final_image = image.crop((crop_left, crop_top, crop_right, crop_bottom))
+    return final_image
+
+def random_resize(image):
+    size = image.size
+    new_size = [rnd.randint(int(0.5*size[0]), int(1.5*size[0])), rnd.randint(int(0.5*size[1]), int(1.5*size[1]))]
+    reduce_factor = rnd.randint(1,4)
+    new_size = tuple([int(x/reduce_factor) for x in new_size])
+    final_image = image.resize(new_size)
+    return final_image
+
+def _level_to_arg(level, _hparams, max):
+    level = max * level / auto_augment._LEVEL_DENOM
+    return level,
+
+_RAND_TRANSFORMS = [
+    'AutoContrast',
+    'Equalize',
+    'Invert',
+    # 'Rotate',
+    'Posterize',
+    'Solarize',
+    'SolarizeAdd',
+    'Color',
+    'Contrast',
+    'Brightness',
+    'Sharpness',
+    'ShearX',
+]
+#_RAND_TRANSFORMS.remove('SharpnessIncreasing')  # remove, interferes with *blur ops
+_RAND_TRANSFORMS.extend([
+    'GaussianBlur',
+    'GaussianNoise',
+    'PoissonNoise'
+])
+auto_augment.LEVEL_TO_ARG.update({
+    'GaussianBlur': partial(_level_to_arg, max=4),
+    'MotionBlur': partial(_level_to_arg, max=20),
+    'GaussianNoise': partial(_level_to_arg, max=0.1 * 255),
+    'PoissonNoise': partial(_level_to_arg, max=40)
+})
+auto_augment.NAME_TO_OP.update({
+    'GaussianBlur': gaussian_blur,
+    'MotionBlur': motion_blur,
+    'GaussianNoise': gaussian_noise,
+    'PoissonNoise': poisson_noise
+})
+
+def rand_augment_transform(magnitude=5, num_layers=3):
+    # These are tuned for magnitude=5, which means that effective magnitudes are half of these values.
+    hparams = {
+        'img_mean':128,
+        # 'rotate_deg': 5,
+        'shear_x_pct': 0.9,
+        'shear_y_pct': 0.0,
+    }
+    ra_ops = auto_augment.rand_augment_ops(magnitude, hparams, transforms=_RAND_TRANSFORMS)
+    # Supply weights to disable replacement in random selection (i.e. avoid applying the same op twice)
+    choice_weights = [1. / len(ra_ops) for _ in range(len(ra_ops))]
+    return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)
diff --git a/code/recognization/config.py b/code/recognization/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cdbbf624809a4dd07ab458554cf7691555ec32b
--- /dev/null
+++ b/code/recognization/config.py
@@ -0,0 +1,30 @@
+class Config:
+    FeatureExtraction = 'HRNet'  # or any other feature extraction method
+    SequenceModeling = 'DBiLSTM'  # or any other sequential model
+    Prediction = 'CTC'  # or 'Attn'
+    input_channel = 1  # e.g., RGB image has 3 channels
+    output_channel = 32  # Adjust based on your architecture
+    hidden_size = 256  # Adjust based on your architecture
+    num_class = 182  # Number of output classes
+    device = 'cpu'  # or 'cuda' for GPU
+    batch_max_length = 8  # Maximum sequence length for prediction
+    # Adam optimizer
+    adam = False
+    lr = 0.1
+    batch_size = 4
+    beta1 = 0.9
+    workers = 4
+    num_epochs = 5
+    rho = 0.95
+    eps = 1e-8
+
+    imgH = 32
+    imgW = 400
+    train_data = 'result/train/'  # path to train data
+    valid_data = 'result/validate/'  # path to validation data
+    saved_model = 'model/'
+
+    character =''
+    rgb = False
+    grad_clip = 5
+    
diff --git a/code/recognization/custom_test.py b/code/recognization/custom_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa03e4aa5b8c79b293f90e27f43240dbaa483858
--- /dev/null
+++ b/code/recognization/custom_test.py
@@ -0,0 +1,235 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import os,shutil
+import time
+import argparse
+import random
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+import pytz
+
+import torch
+import torch.utils.data
+import torch.nn.functional as F
+from tqdm import tqdm
+from nltk.metrics.distance import edit_distance
+
+from utils import CTCLabelConverter, AttnLabelConverter, Averager, Logger
+from dataset import hierarchical_dataset, AlignCollate
+from model import Model
+
+def validation(model, criterion, evaluation_loader, converter, opt, device):
+    """ validation or evaluation """
+    eval_arr = []
+    sum_len_gt = 0
+
+    n_correct = 0
+    
+    norm_ED = 0
+    length_of_data = 0
+    infer_time = 0
+    valid_loss_avg = Averager()
+
+    for i, (image_tensors, labels) in enumerate(tqdm(evaluation_loader)):
+        batch_size = image_tensors.size(0)
+        length_of_data = length_of_data + batch_size
+        image = image_tensors.to(device)
+        # For max length prediction
+        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
+        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)
+
+        text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length)
+
+        start_time = time.time()
+        if 'CTC' in opt.Prediction:
+            preds = model(image)
+            forward_time = time.time() - start_time
+            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
+            cost = criterion(preds.log_softmax(2).permute(1, 0, 2), text_for_loss, preds_size, length_for_loss)
+            _, preds_index = preds.max(2)
+            preds_str = converter.decode(preds_index.data, preds_size.data)
+        else:
+            preds = model(image, text=text_for_pred, is_train=False)
+            forward_time = time.time() - start_time
+
+            preds = preds[:, :text_for_loss.shape[1] - 1, :].to(device)
+            target = text_for_loss[:, 1:].to(device)  # without [GO] Symbol
+            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1))
+            _, preds_index = preds.max(2)
+            preds_str = converter.decode(preds_index, length_for_pred)
+            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)
+
+        infer_time += forward_time
+        valid_loss_avg.add(cost)
+
+        # calculate accuracy & confidence score
+        preds_prob = F.softmax(preds, dim=2)
+        preds_max_prob, _ = preds_prob.max(dim=2)
+        confidence_score_list = []
+        for gt, pred, pred_max_prob in zip(labels, preds_str, preds_max_prob):
+            if 'Attn' in opt.Prediction:
+                gt = gt[:gt.find('[s]')]
+                pred_EOS = pred.find('[s]')
+                pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
+                pred_max_prob = pred_max_prob[:pred_EOS]
+
+            if pred == gt:
+                n_correct += 1
+
+            # ICDAR2019 Normalized Edit Distance
+            if len(gt) == 0 or len(pred) == 0:
+                ED = 0
+            elif len(gt) > len(pred):
+                ED = 1 - edit_distance(pred, gt) / len(gt)
+            else:
+                ED = 1 - edit_distance(pred, gt) / len(pred)
+
+            eval_arr.append([gt,pred,ED])
+
+            sum_len_gt += len(gt)
+            norm_ED += (ED*len(gt))
+
+            # calculate confidence score (= multiply of pred_max_prob)
+            try:
+                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
+            except:
+                confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
+            confidence_score_list.append(confidence_score)
+            # print(pred, gt, pred==gt, confidence_score)
+
+    accuracy = n_correct / float(length_of_data) * 100
+    norm_ED = norm_ED / float(sum_len_gt)
+
+    return valid_loss_avg.val(), accuracy, norm_ED, eval_arr
+
+
+def test(opt, device):
+    opt.device = device
+    os.makedirs("test_outputs", exist_ok=True)
+    datetime_now = str(datetime.now(pytz.timezone('Asia/Kolkata')).strftime("%Y-%m-%d_%H-%M-%S"))
+    logger = Logger(f'test_outputs/{datetime_now}.txt')
+    """ model configuration """
+    if 'CTC' in opt.Prediction:
+        converter = CTCLabelConverter(opt.character)
+    else:
+        converter = AttnLabelConverter(opt.character)
+    opt.num_class = len(converter.character)
+
+    if opt.rgb:
+        opt.input_channel = 3
+    model = Model(opt)
+    logger.log('model input parameters', opt.imgH, opt.imgW, opt.input_channel, opt.output_channel,
+          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.FeatureExtraction,
+          opt.SequenceModeling, opt.Prediction)
+    model = model.to(device)
+
+    # load model
+    model.load_state_dict(torch.load(opt.saved_model, map_location=device))
+    logger.log('Loaded pretrained model from %s' % opt.saved_model)
+    # logger.log(model)
+
+    """ setup loss """
+    if 'CTC' in opt.Prediction:
+        criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
+    else:
+        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)  # ignore [GO] token = ignore index 0
+
+    """ evaluation """
+    model.eval()
+    with torch.no_grad():
+        AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW)#, keep_ratio_with_pad=opt.PAD)
+        eval_data, eval_data_log = hierarchical_dataset(root=opt.eval_data, opt=opt, rand_aug=False)
+        logger.log(eval_data_log)
+        evaluation_loader = torch.utils.data.DataLoader(
+            eval_data, batch_size=opt.batch_size,
+            shuffle=False,
+            num_workers=int(opt.workers),
+            collate_fn=AlignCollate_evaluation, pin_memory=True)
+        _, accuracy, norm_ED, eval_arr = validation( model, criterion, evaluation_loader, converter, opt,device)
+    logger.log("="*20)
+    logger.log(f'Accuracy : {accuracy:0.4f}\n')
+    logger.log(f'Norm_ED : {norm_ED:0.4f}\n')
+    logger.log("="*20)
+    
+    if opt.visualize:
+        logger.log("Threshold - ", opt.threshold)
+        logger.log("ED","\t","gt","\t","pred")
+        arr = []
+        for gt,pred,ED in eval_arr:
+            ED = ED*100.0
+            arr.append(ED)
+            if ED<=(opt.threshold):
+                logger.log(ED,"\t",gt,"\t",pred)
+        plt.hist(arr, edgecolor="red")
+        plt.savefig('test_outputs/'+str(datetime_now)+".png")
+        plt.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--visualize', action='store_true', help='for visualization of bad samples')
+    parser.add_argument('--threshold', type=float, help='Save samples below this threshold in txt file', default=50.0)
+    parser.add_argument('--eval_data', required=True, help='path to evaluation dataset')
+    parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
+    parser.add_argument('--batch_size', type=int, default=32, help='input batch size')
+    parser.add_argument('--saved_model', required=True, help="path to saved_model to evaluation")
+    """ Data processing """
+    parser.add_argument('--batch_max_length', type=int, default=100, help='maximum-label-length')
+    parser.add_argument('--imgH', type=int, default=32, help='the height of the input image')
+    parser.add_argument('--imgW', type=int, default=400, help='the width of the input image')
+    parser.add_argument('--rgb', action='store_true', help='use rgb input')
+    """ Model Architecture """
+    parser.add_argument('--FeatureExtraction', type=str, default="HRNet", #required=True,
+                        help='FeatureExtraction stage VGG|RCNN|ResNet|UNet|HRNet|Densenet|InceptionUnet|ResUnet|AttnUNet|UNet|VGG')
+    parser.add_argument('--SequenceModeling', type=str, default="DBiLSTM", #required=True,
+                        help='SequenceModeling stage LSTM|GRU|MDLSTM|BiLSTM|DBiLSTM')
+    parser.add_argument('--Prediction', type=str, default="CTC", #required=True,
+                        help='Prediction stage CTC|Attn')
+    parser.add_argument('--input_channel', type=int, default=1, help='the number of input channel of Feature extractor')
+    parser.add_argument('--output_channel', type=int, default=512, help='the number of output channel of Feature extractor')
+    parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state')
+    """ GPU Selection """
+    parser.add_argument('--device_id', type=str, default=None, help='cuda device ID')
+
+    opt = parser.parse_args()
+    if opt.FeatureExtraction == "HRNet":
+        opt.output_channel = 32
+    
+    # Fix random seeds for both numpy and pytorch
+    seed = 1111
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    """ vocab / character number configuration """
+    file = open("UrduGlyphs.txt","r",encoding="utf-8")
+    content = file.readlines()
+    content = ''.join([str(elem).strip('\n') for elem in content])
+    opt.character = content+" "
+    
+    cuda_str = 'cuda'
+    if opt.device_id is not None:
+        cuda_str = f'cuda:{opt.device_id}'
+    device = torch.device(cuda_str if torch.cuda.is_available() else 'cpu')
+    print("Device : ", device)
+    
+    # opt.eval_data = "/DATA/parseq/val/"
+    # test(opt, device)
+    
+    # opt.eval_data = "/DATA/parseq/IIITH/lmdb_new/"
+    # test(opt, device)
+    
+    # opt.eval_data = "/DATA/public_datasets/UPTI/valid/"
+    # test(opt, device)
+    
+    test(opt, device)
\ No newline at end of file
diff --git a/code/recognization/data/1.png b/code/recognization/data/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..f94ddbc7af79bd6d47d29193dd9bb762446ece59
Binary files /dev/null and b/code/recognization/data/1.png differ
diff --git a/code/recognization/data/10.png b/code/recognization/data/10.png
new file mode 100644
index 0000000000000000000000000000000000000000..a650218fbf7cdf2f519e6cdffe93abe7fc4c6367
Binary files /dev/null and b/code/recognization/data/10.png differ
diff --git a/code/recognization/data/11.png b/code/recognization/data/11.png
new file mode 100644
index 0000000000000000000000000000000000000000..11e1b8ba39559b94b79e9c9dbf3664c55a770f4a
Binary files /dev/null and b/code/recognization/data/11.png differ
diff --git a/code/recognization/data/12.png b/code/recognization/data/12.png
new file mode 100644
index 0000000000000000000000000000000000000000..ef9831effbd724280bce703ae3a831a10bc33dff
Binary files /dev/null and b/code/recognization/data/12.png differ
diff --git a/code/recognization/data/13.png b/code/recognization/data/13.png
new file mode 100644
index 0000000000000000000000000000000000000000..2f172918a33b6f2c066ef50114a8331cbc3ae7aa
Binary files /dev/null and b/code/recognization/data/13.png differ
diff --git a/code/recognization/data/14.png b/code/recognization/data/14.png
new file mode 100644
index 0000000000000000000000000000000000000000..de999a280c19d9e3232f18723c50264ab26b941f
Binary files /dev/null and b/code/recognization/data/14.png differ
diff --git a/code/recognization/data/15.png b/code/recognization/data/15.png
new file mode 100644
index 0000000000000000000000000000000000000000..d017a9f04b9c16c96c6d1fff4126dc3f53decd09
Binary files /dev/null and b/code/recognization/data/15.png differ
diff --git a/code/recognization/data/16.png b/code/recognization/data/16.png
new file mode 100644
index 0000000000000000000000000000000000000000..dbe64cbab6ebe6062ec0df625b04ab274e0136ea
Binary files /dev/null and b/code/recognization/data/16.png differ
diff --git a/code/recognization/data/2.png b/code/recognization/data/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..0c8360bc48b14d88ac08931e8fee4571feb68df7
Binary files /dev/null and b/code/recognization/data/2.png differ
diff --git a/code/recognization/data/2_1.png b/code/recognization/data/2_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..65934d3b52b2c9846187c0f546574cec4757b91d
Binary files /dev/null and b/code/recognization/data/2_1.png differ
diff --git a/code/recognization/data/2_2.png b/code/recognization/data/2_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..cd65ca3d9a89d365e62b0074e9277e2994fb3450
Binary files /dev/null and b/code/recognization/data/2_2.png differ
diff --git a/code/recognization/data/3.png b/code/recognization/data/3.png
new file mode 100644
index 0000000000000000000000000000000000000000..79937c25582d171d3f2d434df38fa0ed7e454b64
Binary files /dev/null and b/code/recognization/data/3.png differ
diff --git a/code/recognization/data/4.png b/code/recognization/data/4.png
new file mode 100644
index 0000000000000000000000000000000000000000..8bfa8d1e3ee9c2c2e761a1138374423d2a14de8c
Binary files /dev/null and b/code/recognization/data/4.png differ
diff --git a/code/recognization/data/5.png b/code/recognization/data/5.png
new file mode 100644
index 0000000000000000000000000000000000000000..8d9c702b8df2dceab649256d1bb8d8898b04e57d
Binary files /dev/null and b/code/recognization/data/5.png differ
diff --git a/code/recognization/data/6.png b/code/recognization/data/6.png
new file mode 100644
index 0000000000000000000000000000000000000000..4bf454ecbd45de060de29f0e1e7fda49d124dfc1
Binary files /dev/null and b/code/recognization/data/6.png differ
diff --git a/code/recognization/data/7.png b/code/recognization/data/7.png
new file mode 100644
index 0000000000000000000000000000000000000000..bc2067af157757b83871d257cafb7b04435c55cc
Binary files /dev/null and b/code/recognization/data/7.png differ
diff --git a/code/recognization/data/8.png b/code/recognization/data/8.png
new file mode 100644
index 0000000000000000000000000000000000000000..baae41841253de77caee0298aa19f37e75311e37
Binary files /dev/null and b/code/recognization/data/8.png differ
diff --git a/code/recognization/data/9.png b/code/recognization/data/9.png
new file mode 100644
index 0000000000000000000000000000000000000000..b494b962205b7818169e8b107b5bfa68d3e6c701
Binary files /dev/null and b/code/recognization/data/9.png differ
diff --git a/code/recognization/dataset.py b/code/recognization/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a81b10c8b207fe5916c7c8b447e72cf6f5bab42
--- /dev/null
+++ b/code/recognization/dataset.py
@@ -0,0 +1,319 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import os
+import sys
+import re
+import six
+import math
+import lmdb
+import torch
+import random
+import numpy as np
+from PIL import Image
+import torchvision.transforms as T
+#from torch._utils import _accumulate
+from torch.utils.data import Dataset, ConcatDataset, Subset
+
+def accumulate(iterable):
+    total = 0
+    for value in iterable:
+        total += value
+        yield total
+
+
+class Batch_Balanced_Dataset(object):
+
+    def __init__(self, opt, rand_aug = False):
+        """
+        Modulate the data ratio in the batch.
+        For example, when select_data is "MJ-ST" and batch_ratio is "0.5-0.5",
+        the 50% of the batch is filled with MJ and the other 50% of the batch is filled with ST.
+        """
+        log = open(f'./saved_models/{opt.exp_name}/log_dataset.txt', 'a',encoding="utf-8")
+        dashed_line = '-' * 80
+        print(dashed_line)
+        log.write(dashed_line + '\n')
+        print(f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}')
+        log.write(f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}\n')
+        assert len(opt.select_data) == len(opt.batch_ratio)
+
+        _AlignCollate = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)
+        self.data_loader_list = []
+        self.dataloader_iter_list = []
+        batch_size_list = []
+        Total_batch_size = 0
+        for selected_d, batch_ratio_d in zip(opt.select_data, opt.batch_ratio):
+            _batch_size = max(round(opt.batch_size * float(batch_ratio_d)), 1)
+            print(dashed_line)
+            log.write(dashed_line + '\n')
+            _dataset, _dataset_log = hierarchical_dataset(root=opt.train_data, opt=opt, select_data=[selected_d], rand_aug=rand_aug)
+            total_number_dataset = len(_dataset)
+            log.write(_dataset_log)
+
+            """
+            The total number of data can be modified with opt.total_data_usage_ratio.
+            ex) opt.total_data_usage_ratio = 1 indicates 100% usage, and 0.2 indicates 20% usage.
+            See 4.2 section in our paper.
+            """
+            number_dataset = int(total_number_dataset * float(opt.total_data_usage_ratio))
+            dataset_split = [number_dataset, total_number_dataset - number_dataset]
+            indices = range(total_number_dataset)
+            _dataset, _ = [Subset(_dataset, indices[offset - length:offset])
+                           for offset, length in zip(_accumulate(dataset_split), dataset_split)]
+            selected_d_log = f'num total samples of {selected_d}: {total_number_dataset} x {opt.total_data_usage_ratio} (total_data_usage_ratio) = {len(_dataset)}\n'
+            selected_d_log += f'num samples of {selected_d} per batch: {opt.batch_size} x {float(batch_ratio_d)} (batch_ratio) = {_batch_size}'
+            print(selected_d_log)
+            log.write(selected_d_log + '\n')
+            batch_size_list.append(str(_batch_size))
+            Total_batch_size += _batch_size
+
+            _data_loader = torch.utils.data.DataLoader(
+                _dataset, batch_size=_batch_size,
+                shuffle=True,
+                num_workers=0,
+                collate_fn=_AlignCollate, pin_memory=True)
+            self.data_loader_list.append(_data_loader)
+            self.dataloader_iter_list.append(iter(_data_loader))
+
+        Total_batch_size_log = f'{dashed_line}\n'
+        batch_size_sum = '+'.join(batch_size_list)
+        Total_batch_size_log += f'Total_batch_size: {batch_size_sum} = {Total_batch_size}\n'
+        Total_batch_size_log += f'{dashed_line}'
+        opt.batch_size = Total_batch_size
+
+        print(Total_batch_size_log)
+        log.write(Total_batch_size_log + '\n')
+        log.close()
+
+    def get_batch(self):
+        balanced_batch_images = []
+        balanced_batch_texts = []
+
+        for i, data_loader_iter in enumerate(self.dataloader_iter_list):
+            try:
+                image, text = data_loader_iter.next()
+                balanced_batch_images.append(image)
+                balanced_batch_texts += text
+            except StopIteration:
+                self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
+                image, text = self.dataloader_iter_list[i].next()
+                balanced_batch_images.append(image)
+                balanced_batch_texts += text
+            except ValueError:
+                pass
+
+        balanced_batch_images = torch.cat(balanced_batch_images, 0)
+
+        return balanced_batch_images, balanced_batch_texts
+
+
+def hierarchical_dataset(root, opt, select_data='/', rand_aug = False):
+    """ select_data='/' contains all sub-directory of root directory """
+    dataset_list = []
+    dataset_log = f'dataset_root:    {root}\t dataset: {select_data[0]}'
+    # print(dataset_log)
+    dataset_log += '\n'
+    for dirpath, dirnames, filenames in os.walk(root+'/'):
+        if not dirnames:
+            select_flag = False
+            for selected_d in select_data:
+                if selected_d in dirpath:
+                    select_flag = True
+                    break
+
+            if select_flag:
+                dataset = LmdbDataset(dirpath, opt, rand_aug=rand_aug)
+                sub_dataset_log = f'sub-directory:\t/{os.path.relpath(dirpath, root)}\t num samples: {len(dataset)}'
+                # print(sub_dataset_log)
+                dataset_log += f'{sub_dataset_log}\n'
+                dataset_list.append(dataset)
+
+    print( len(dataset_list) )
+    concatenated_dataset = ConcatDataset(dataset_list)
+
+    return concatenated_dataset, dataset_log
+
+
+class LmdbDataset(Dataset):
+    def __init__(self, root, opt,rand_aug=False, transform=None):
+        self.root = root
+        self.opt = opt
+        self.rand_aug = rand_aug
+        self.transform = transform
+        self.env = lmdb.open(root, max_readers=32, readonly=True, lock=False, readahead=False, meminit=False)
+        if not self.env:
+            print('cannot create lmdb from %s' % (root))
+            sys.exit(0)
+
+        with self.env.begin(write=False) as txn:
+            nSamples = int(txn.get('num-samples'.encode()))
+            self.nSamples = nSamples
+            self.filtered_index_list = []
+            for index in range(self.nSamples):
+                index += 1  # lmdb starts with 1
+                label_key = 'label-%09d'.encode() % index
+                label = txn.get(label_key).decode('utf-8')
+
+                if len(label) > self.opt.batch_max_length:
+                    # print(f'The length of the label is longer than max_length: length {len(label)}, {label} in dataset {self.root}')
+                    continue
+
+                # By default, images containing characters which are not in opt.character are filtered.
+                # You can add [UNK] token to `opt.character` in utils.py instead of this filtering
+                out_of_char = f'[^{self.opt.character}]'
+                if re.search(out_of_char, label):
+                    print ("This string contains a character not part of our dictionnary")
+                    continue
+
+                self.filtered_index_list.append(index)
+
+                self.nSamples = len(self.filtered_index_list)
+        if self.transform is None:
+            self.transform = []
+        if self.rand_aug:
+            from augmentation import rand_augment_transform,salt_and_pepper_noise, random_border_crop, random_resize
+            self.transform.append(rand_augment_transform())
+            self.transform.append(T.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.25))
+            if random.random()<0.25:
+                self.transform.append(lambda img: salt_and_pepper_noise(img))
+            if random.random()<0.25:
+                self.transform.append(lambda img: random_border_crop(img))
+            self.transform.append(T.RandomRotation(5))
+            if random.random()<0.25:
+                self.transform.append(lambda img: random_resize(img))
+            self.transform = T.Compose(self.transform)
+
+    def __len__(self):
+        return self.nSamples
+
+    def __getitem__(self, index):
+        assert index <= len(self), 'index range error'
+        index = self.filtered_index_list[index]
+
+        with self.env.begin(write=False) as txn:
+            label_key = 'label-%09d'.encode() % index
+            label = txn.get(label_key).decode('utf-8')
+            img_key = 'image-%09d'.encode() % index
+            imgbuf = txn.get(img_key)
+
+            buf = six.BytesIO()
+            buf.write(imgbuf)
+            buf.seek(0)
+            try:
+                if self.opt.rgb:
+                    img1 = Image.open(buf).convert('RGB')  # for color image
+                    img = img1.transpose(Image.FLIP_LEFT_RIGHT)
+                else:
+                    img1 = Image.open(buf).convert('L')
+                    img = img1.transpose(Image.FLIP_LEFT_RIGHT)
+
+            except IOError:
+                print(f'Corrupted image for {index}')
+                # make dummy image and dummy label for corrupted image.
+                if self.opt.rgb:
+                    img = Image.new('RGB', (self.opt.imgW, self.opt.imgH))
+                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
+                else:
+                    img = Image.new('L', (self.opt.imgW, self.opt.imgH))
+                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
+                label = '[dummy_label]'
+
+            # We only train and evaluate on alphanumerics (or pre-defined character set in train.py)
+            out_of_char = f'[^{self.opt.character}]'
+            label = re.sub(out_of_char, '', label)
+
+            if self.transform:
+                img = self.transform(img)
+
+        return (img, label)
+
+class ResizeNormalize(object):
+
+    def __init__(self, size, interpolation=Image.BICUBIC):
+        self.size = size
+        self.interpolation = interpolation
+        self.toTensor = T.ToTensor()
+
+    def __call__(self, img):
+        img = img.resize(self.size, self.interpolation)
+        img = self.toTensor(img)
+        img.sub_(0.5).div_(0.5)
+        return img
+
+
+class NormalizePAD(object):
+
+    def __init__(self, max_size, PAD_type='right'):
+        self.toTensor = T.ToTensor()
+        self.max_size = max_size
+        self.max_width_half = math.floor(max_size[2] / 2)
+        self.PAD_type = PAD_type
+
+    def __call__(self, img):
+        img = self.toTensor(img)
+        img.sub_(0.5).div_(0.5)
+        c, h, w = img.size()
+        Pad_img = torch.FloatTensor(*self.max_size).fill_(0)
+        Pad_img[:, :, :w] = img  # right pad
+        if self.max_size[2] != w:  # add border Pad
+            Pad_img[:, :, w:] = img[:, :, w - 1].unsqueeze(2).expand(c, h, self.max_size[2] - w)
+
+        return Pad_img
+
+
+class AlignCollate(object):
+
+    def __init__(self, imgH=32, imgW=100, keep_ratio_with_pad=True):
+        self.imgH = imgH
+        self.imgW = imgW
+        self.keep_ratio_with_pad = keep_ratio_with_pad
+
+    def __call__(self, batch):
+        batch = filter(lambda x: x is not None, batch)
+        images, labels = zip(*batch)
+
+        if self.keep_ratio_with_pad:  # same concept with 'Rosetta' paper
+            resized_max_w = self.imgW
+            input_channel = 3 if images[0].mode == 'RGB' else 1
+            transform = NormalizePAD((input_channel, self.imgH, resized_max_w))
+
+            resized_images = []
+            for image in images:
+                w, h = image.size
+                ratio = w / float(h)
+                if math.ceil(self.imgH * ratio) > self.imgW:
+                    resized_w = self.imgW
+                else:
+                    resized_w = math.ceil(self.imgH * ratio)
+
+                resized_image = image.resize((resized_w, self.imgH), Image.BICUBIC)
+                resized_images.append(transform(resized_image))
+                # resized_image.save('./image_test/%d_test.jpg' % w)
+
+            image_tensors = torch.cat([t.unsqueeze(0) for t in resized_images], 0)
+
+        else:
+            transform = ResizeNormalize((self.imgW, self.imgH))
+            image_tensors = [transform(image) for image in images]
+            image_tensors = torch.cat([t.unsqueeze(0) for t in image_tensors], 0)
+
+        return image_tensors, labels
+
+def tensor2im(image_tensor, imtype=np.uint8):
+    image_numpy = image_tensor.cpu().float().numpy()
+    if image_numpy.shape[0] == 1:
+        image_numpy = np.tile(image_numpy, (3, 1, 1))
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+    return image_numpy.astype(imtype)
+
+
+def save_image(image_numpy, image_path):
+    image_pil = Image.fromarray(image_numpy)
+    image_pil.save(image_path)
diff --git a/code/recognization/densenet.py b/code/recognization/densenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..363a2849de7ace8d813a99f0750e412f887af9c7
--- /dev/null
+++ b/code/recognization/densenet.py
@@ -0,0 +1,112 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+'''
+Source - https://github.com/NYUMedML/DARTS/blob/master/DARTS/models/dense_unet_model.py
+An implementation of this paper - https://arxiv.org/abs/1608.06993
+'''
+
+class Single_level_densenet(nn.Module):
+    def __init__(self,filters, num_conv = 4):
+        super(Single_level_densenet, self).__init__()
+        self.num_conv = num_conv
+        self.conv_list = nn.ModuleList()
+        self.bn_list = nn.ModuleList()
+        for i in range(self.num_conv):
+            self.conv_list.append(nn.Conv2d(filters,filters,3, padding = 1))
+            self.bn_list.append(nn.BatchNorm2d(filters))
+            
+    def forward(self,x):
+        outs = []
+        outs.append(x)
+        for i in range(self.num_conv):
+            temp_out = self.conv_list[i](outs[i])
+            if i > 0:
+                for j in range(i):
+                    temp_out += outs[j]
+            outs.append(F.relu(self.bn_list[i](temp_out)))
+        out_final = outs[-1]
+        del outs
+        return out_final
+    
+class Down_sample(nn.Module):
+    def __init__(self,kernel_size = 2, stride = 2):
+        super(Down_sample, self).__init__()
+        self.down_sample_layer = nn.MaxPool2d(kernel_size, stride)
+    
+    def forward(self,x):
+        y = self.down_sample_layer(x)
+        return y,x
+
+class Upsample_n_Concat(nn.Module):
+    def __init__(self,filters):
+        super(Upsample_n_Concat, self).__init__()
+        self.upsample_layer = nn.ConvTranspose2d(filters, filters, 4, padding = 1, stride = 2)
+        self.conv = nn.Conv2d(2*filters,filters,3, padding = 1)
+        self.bn = nn.BatchNorm2d(filters)
+    
+    def forward(self,x,y):
+        x = self.upsample_layer(x)
+        x = torch.cat([x,y],dim = 1)
+        x = F.relu(self.bn(self.conv(x)))
+        return x
+    
+
+class DenseNet(nn.Module):
+    def __init__(self, in_chan=1, out_chan=512, filters=256, num_conv = 4):
+        super(DenseNet, self).__init__()
+        self.conv1 = nn.Conv2d(in_chan,filters,1)
+        self.d1 = Single_level_densenet(filters,num_conv )
+        self.down1 = Down_sample()
+        self.d2 = Single_level_densenet(filters,num_conv )
+        self.down2 = Down_sample()
+        self.d3 = Single_level_densenet(filters,num_conv )
+        self.down3 = Down_sample()
+        self.d4 = Single_level_densenet(filters,num_conv )
+        self.down4 = Down_sample()
+        self.bottom = Single_level_densenet(filters,num_conv )
+        self.up4 = Upsample_n_Concat(filters)
+        self.u4 = Single_level_densenet(filters,num_conv )
+        self.up3 = Upsample_n_Concat(filters)
+        self.u3 = Single_level_densenet(filters,num_conv )
+        self.up2 = Upsample_n_Concat(filters)
+        self.u2 = Single_level_densenet(filters,num_conv )
+        self.up1 = Upsample_n_Concat(filters)
+        self.u1 = Single_level_densenet(filters,num_conv )
+        self.outconv = nn.Conv2d(filters,out_chan, 1)
+#         self.outconvp1 = nn.Conv2d(filters,out_chan, 1)
+#         self.outconvm1 = nn.Conv2d(filters,out_chan, 1)
+        
+        
+    def forward(self,x):
+        bsz = x.shape[0]
+        x = self.conv1(x)
+        x,y1 = self.down1(self.d1(x))
+        x,y2 = self.down1(self.d2(x))
+        x,y3 = self.down1(self.d3(x))
+        x,y4 = self.down1(self.d4(x))
+        x = self.bottom(x)
+        x = self.u4(self.up4(x,y4))
+        x = self.u3(self.up3(x,y3))
+        x = self.u2(self.up2(x,y2))
+        x = self.u1(self.up1(x,y1))
+        x1 = self.outconv(x)
+#         xm1 = self.outconvm1(x)
+#         xp1 = self.outconvp1(x)
+        
+        return x1
+
+# # x = torch.randn(1, 1, 32, 400)
+# model = DenseNet(1, 512)
+# # out = model(x)
+# # print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/dropout_layer.py b/code/recognization/dropout_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..39a8f2e61156732692acfc2cc1a91e282969040d
--- /dev/null
+++ b/code/recognization/dropout_layer.py
@@ -0,0 +1,24 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+import torch
+import numpy as np
+
+class dropout_layer(nn.Module):
+    def __init__(self,device):
+        super(dropout_layer, self).__init__()
+        self.device = device
+    def forward(self, input):
+        nums = (np.random.rand(input.shape[1]) > 0.2).astype (int)
+        dummy_array_output = torch.from_numpy(nums).to(self.device)
+        dummy_array_output_t = torch.reshape(dummy_array_output, (input.shape[1], 1)).to(self.device) #Transpose
+        dummy_array_output_f = dummy_array_output_t.repeat(input.shape[0], 1,input.shape[2]).to(self.device) #Same size as input
+        output =  input*dummy_array_output_f  #element-wise multiplication
+        return output
\ No newline at end of file
diff --git a/code/recognization/feature_extraction.py b/code/recognization/feature_extraction.py
new file mode 100755
index 0000000000000000000000000000000000000000..fb22154712b10018df5563d5101aab6f2b912084
--- /dev/null
+++ b/code/recognization/feature_extraction.py
@@ -0,0 +1,105 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+
+from .densenet import DenseNet
+from .hrnet import HRNet
+from .inception_unet import InceptionUNet
+from .rcnn import RCNN
+from .resnet import ResNet
+from .resunet import ResUnet
+from .unet_attn import AttnUNet
+from .unet_plus_plus import NestedUNet
+from .unet import UNet
+from .vgg import VGG
+
+class DenseNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(DenseNet_FeatureExtractor, self).__init__()
+        self.ConvNet = DenseNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class HRNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=32):
+        super(HRNet_FeatureExtractor, self).__init__()
+        self.ConvNet = HRNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class InceptionUNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(InceptionUNet_FeatureExtractor, self).__init__()
+        self.ConvNet = InceptionUNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class RCNN_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(RCNN_FeatureExtractor, self).__init__()
+        self.ConvNet = RCNN(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class ResNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(ResNet_FeatureExtractor, self).__init__()
+        self.ConvNet = ResNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+    
+class ResUnet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(ResUnet_FeatureExtractor, self).__init__()
+        self.ConvNet = ResUnet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class AttnUNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(AttnUNet_FeatureExtractor, self).__init__()
+        self.ConvNet = AttnUNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class UNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(UNet_FeatureExtractor, self).__init__()
+        self.ConvNet = UNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class UNetPlusPlus_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(UNetPlusPlus_FeatureExtractor, self).__init__()
+        self.ConvNet = NestedUNet(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+class VGG_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(VGG_FeatureExtractor, self).__init__()
+        self.ConvNet = VGG(input_channel, output_channel)
+
+    def forward(self, input):
+        return self.ConvNet(input)
+    
+# x = torch.randn(1, 1, 32, 400)
+# model = UNet_FeatureExtractor()
+# out = model(x)
diff --git a/code/recognization/hrnet.py b/code/recognization/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d819d6833a2cbbc5a3ee118cc0df30ad6b0bdfe
--- /dev/null
+++ b/code/recognization/hrnet.py
@@ -0,0 +1,242 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+'''
+Source - https://github.com/shuuchen/HRNet/blob/master/hrnet.py
+An implementation of this paper - https://arxiv.org/pdf/1908.07919.pdf
+'''
+
+import torch
+from torch import nn
+
+# from utils import draw_feature_map
+
+
+BN_MOMENTUM = 0.1
+
+# Conv Module - Does not change the shape of the input, only the number of channels
+class Conv(nn.Module):
+    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, relued=True):
+        super(Conv, self).__init__()
+        padding = (kernel_size - 1) // 2
+        self.conv_bn = nn.Sequential(
+                nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding, bias=False),
+                nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM))
+        self.relu = nn.ReLU()
+        self.relued = relued
+
+    def forward(self, x):
+        x = self.conv_bn(x)
+        if self.relued:
+            x = self.relu(x)
+        return x
+
+
+# BasicBlock - Does not change shape or size of the input at all
+class BasicBlock(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(BasicBlock, self).__init__()
+        self.conv = nn.Sequential(
+                Conv(in_ch, out_ch),
+                Conv(in_ch, out_ch, relued=False))
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        identity = x
+        x = self.conv(x)
+        x = x + identity
+        return self.relu(x)
+
+# Bottleneck - Does not change shape of the input, increases channels to 4*out_ch (instead of out_ch)
+class Bottleneck(nn.Module):
+
+    expansion = 4
+
+    def __init__(self, in_ch, out_ch, downsampling=None):
+        super(Bottleneck, self).__init__()
+        self.conv = nn.Sequential(
+                Conv(in_ch, out_ch, kernel_size=1),
+                Conv(out_ch, out_ch),
+                Conv(out_ch, out_ch * self.expansion, kernel_size=1, relued=False))
+        self.relu = nn.ReLU()
+        self.downsampling = downsampling
+
+    def forward(self, x):
+        identity = x
+        x = self.conv(x)
+        if self.downsampling:
+            identity = self.downsampling(identity)
+        x = x + identity
+        return self.relu(x)
+
+# UpSampling - Reduces the number of channels to ch // up_factor and increases input size up_factor times
+class UpSampling(nn.Module):
+    def __init__(self, ch, up_factor):
+        super(UpSampling, self).__init__()
+        self.up_sampling = nn.Sequential(
+                nn.Upsample(scale_factor=up_factor, mode='bilinear', align_corners=False),
+                Conv(ch, ch // up_factor, 1, relued=False))
+    def forward(self, x):
+        return self.up_sampling(x)
+
+# DownSampling - Increases the number of channels and reduces input size by a factor of (2^num_samplings)
+class DownSampling(nn.Module):
+    def __init__(self, ch, num_samplings):
+        super(DownSampling, self).__init__()
+        convs = []
+        for i in range(num_samplings):
+            relued = True if i < num_samplings - 1 else False
+            convs.append(Conv(ch, ch * 2, 3, 2, relued=relued))
+            ch *= 2
+        self.down_sampling = nn.Sequential(*convs)
+
+    def forward(self, x):
+        return self.down_sampling(x)
+
+
+class HRBlock(nn.Module):
+    def __init__(self, ch, index, last_stage, block, num_conv_block_per_list=4):
+        super(HRBlock, self).__init__()
+        self.index = index
+        self.last_stage = last_stage
+        self.num_conv_block_per_list = num_conv_block_per_list
+        self.relu = nn.ReLU()
+
+        self.parallel_conv_lists = nn.ModuleList()
+        for i in range(index):
+            ch_i = ch * 2**i
+            conv_list = []
+            for j in range(num_conv_block_per_list):
+                conv_list.append(block(ch_i, ch_i))
+            self.parallel_conv_lists.append(nn.Sequential(*conv_list))
+
+        self.up_conv_lists = nn.ModuleList()
+        for i in range(index - 1):
+            conv_list = nn.ModuleList()
+            for j in range(i + 1, index):
+                up_factor = 2 ** (j-i)
+                ch_j = ch * 2**j
+                conv_list.append(UpSampling(ch_j, up_factor))
+            self.up_conv_lists.append(conv_list)
+
+        self.down_conv_lists = nn.ModuleList()
+        for i in range(1, index if last_stage else index + 1):
+            conv_list = nn.ModuleList()
+            for j in range(i):
+                ch_j = ch * 2**j
+                conv_list.append(DownSampling(ch_j, i - j))
+            self.down_conv_lists.append(conv_list)
+
+    def forward(self, x_list):
+        parallel_res_list = []
+        for i in range(self.index):
+            x = x_list[i]
+            x = self.parallel_conv_lists[i](x)
+            parallel_res_list.append(x)
+
+        final_res_list = []
+        for i in range(self.index if self.last_stage else self.index + 1):
+            # Downsampling all streams to a dimension just lower than the lowest stream, for next stage (Don't do for last stage i.e. index = 4 obviously)
+            if i == self.index:
+                x = 0
+                for t, m in zip(parallel_res_list, self.down_conv_lists[-1]):
+                    x = x + m(t)
+            else:
+                x = parallel_res_list[i]
+                # Upsampling all streams (except the uppermost), to all possible dimensions above it till the highest stream
+                if i != self.index - 1:
+                    res_list = parallel_res_list[i+1:]
+                    up_x = 0
+                    for t, m in zip(res_list, self.up_conv_lists[i]):
+                        up_x = up_x + m(t)
+                    x = x + up_x
+                # Downsampling all streams (except the lowest) to all possible dimensions below it till the lowest stream dimension
+                if i != 0:
+                    res_list = parallel_res_list[:i]
+                    down_x = 0
+                    for t, m in zip(res_list, self.down_conv_lists[i - 1]):
+                        down_x = down_x + m(t)
+                    x = x + down_x
+            x = self.relu(x)
+            final_res_list.append(x)
+        return final_res_list
+
+
+class HRNet(nn.Module):
+    def __init__(self, in_ch=1, out_ch=32, mid_ch=64, num_stage=4):
+        super(HRNet, self).__init__()
+        self.init_conv = nn.Sequential(
+                    Conv(in_ch, 64, 1),
+                    Conv(64, 64, 1))
+        self.head = nn.Sequential(
+                    Conv(mid_ch * (1 + 2 + 4 + 8), mid_ch * (1 + 2 + 4 + 8), 1),
+                    nn.Conv2d(mid_ch * (1 + 2 + 4 + 8), out_ch, 1))
+        self.first_layer = self._make_layer(64, 64, Bottleneck, 4)
+        self.first_transition = self._make_transition_layer(256, mid_ch, 1)
+        self.num_stage = num_stage
+        self.hr_blocks = nn.ModuleList()
+        for i in range(1, num_stage):
+            self.hr_blocks.append(HRBlock(mid_ch, i + 1, True if i == num_stage - 1 else False, BasicBlock))
+
+        self.up_samplings = nn.ModuleList()
+        for i in range(num_stage - 1):
+            up_factor = 2 ** (i + 1)
+            up = nn.Upsample(scale_factor=up_factor, mode='bilinear')
+            self.up_samplings.append(up)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.normal_(m.weight, std=0.001)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, in_ch, ch, block, num):
+        downsampling = None
+        if in_ch != ch * block.expansion:
+            downsampling = Conv(in_ch, ch * block.expansion, 1, relued=False)
+        layers = []
+        layers.append(block(in_ch, ch, downsampling))
+        for i in range(1, num):
+            layers.append(block(ch * block.expansion, ch))
+        return nn.Sequential(*layers)
+    
+    def _make_transition_layer(self, in_ch, out_ch, stage):
+        layers = nn.ModuleList()
+        layers.append(Conv(in_ch, out_ch, 1))
+        layers.append(Conv(in_ch, out_ch * 2, 3, 2))
+        return layers
+
+    def forward(self, x):
+        x = self.init_conv(x)
+        # Save visual_features from any 10 random channels for visualization # For image at index 0 in batch
+        # draw_feature_map(x,"vis_feature_maps/initial_layer", num_channel=25)
+        # if os.path.exists('vis_feature_maps/initial_layer'):
+  
+        x = self.first_layer(x)
+        x_list = [m(x) for m in self.first_transition]
+        for i in range(self.num_stage - 1):
+            x_list = self.hr_blocks[i](x_list)
+            # Visualization from any 10 random channels for visualization # For image at index 0 in batch
+            # if i==2: # Last stage
+            #     draw_feature_map( x_list[-1],"vis_feature_maps/lower_layers",25)
+
+        res_list = [x_list[0]]
+        for t, m in zip(x_list[1:], self.up_samplings):
+            res_list.append(m(t))
+        x = torch.cat(res_list, dim=1)
+        x = self.head(x)
+        # draw_feature_map(x,"vis_feature_maps/output_layer", num_channel=25)
+        return x
+
+# x = [torch.randn(1, 64, 32, 400),torch.randn(1, 128, 16, 200), torch.rand(1, 256, 8, 100)]
+# model = HRBlock(ch=64,index=3,last_stage=False,block=BasicBlock) # index = 2,3,4
+# x = torch.randn(1, 1, 32, 400)
+# model = HRNet()
+# out = model(x)
+# print(out.shape)
diff --git a/code/recognization/inception_unet.py b/code/recognization/inception_unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..b489178e7e6dc07119d08f9322357171e10b58be
--- /dev/null
+++ b/code/recognization/inception_unet.py
@@ -0,0 +1,190 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn.functional as F
+import torch.nn as nn
+import torch
+
+'''
+Source - https://github.com/mribrahim/Pytorch-UNet-and-Inception/blob/e627658ee84e26ef3befd1ded4904048997e84f8/unet/inception.py
+An implementation of this paper - https://dl.acm.org/doi/abs/10.1145/3376922
+'''
+
+class InceptionConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        
+        self.double_conv1 = nn.Sequential(
+            nn.MaxPool2d(2),
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+        self.double_conv2 = nn.Sequential(
+            nn.MaxPool2d(2),
+            nn.Conv2d(in_channels, mid_channels, kernel_size=5, padding=2),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=5, padding=2),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+        self.double_conv3 = nn.Sequential(
+            nn.MaxPool2d(2),
+            nn.Conv2d(in_channels, mid_channels, kernel_size=1, padding=0),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+        )
+
+        self.double_conv4 = nn.Sequential(
+            nn.MaxPool2d(2),
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=1, padding=0),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        outputs = [self.double_conv1(x), self.double_conv2(x), self.double_conv3(x), self.double_conv4(x)]
+        return torch.cat(outputs, 1)
+
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        return self.double_conv(x)
+
+
+class Down(nn.Module):
+    """Downscaling with maxpool then double conv"""
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+
+    def forward(self, x):
+        return self.maxpool_conv(x)
+
+
+class UpInception(nn.Module):
+    """Upscaling then double conv"""
+
+    def __init__(self, in_channels, out_channels, bilinear=True):
+        super().__init__()
+
+        # if bilinear, use the normal convolutions to reduce the number of channels
+        if bilinear:
+            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
+        else:
+            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
+            self.conv = DoubleConv(in_channels, out_channels)
+
+    def forward(self, x1, x2, x3):
+        x1 = self.up(x1)
+        x3 = self.up(x3)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        x = torch.cat([x3, x2, x1], dim=1)
+        return self.conv(x)
+
+
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+
+    def forward(self, x):
+        return self.conv(x)
+
+
+class InceptionUNet(nn.Module):
+    def __init__(self, n_channels=1, out_channels=512, bilinear=True):
+        super(InceptionUNet, self).__init__()
+        self.n_channels = n_channels
+        self.out_channels = out_channels
+        self.bilinear = bilinear
+
+        self.block1 = InceptionConv(64, 32)
+        self.block2 = InceptionConv(128, 64)
+        self.block3 = InceptionConv(256, 128)
+        self.block4 = InceptionConv(512, 128)
+
+        self.inc = DoubleConv(n_channels, 64)
+        self.down1 = Down(64, 128)
+        self.down2 = Down(128, 256)
+        self.down3 = Down(256, 512)
+        factor = 2 if bilinear else 1
+        self.down4 = Down(512, 1024 // factor)
+
+        self.up1 = UpInception(1024+512, 256 // factor, bilinear)
+        self.up2 = UpInception(896, 128 // factor, bilinear)
+        self.up3 = UpInception(448, 32 // factor, bilinear)
+        self.up4 = UpInception(208, 16, bilinear)
+        self.outc = OutConv(16, out_channels)
+
+
+    def forward(self, x):
+        x1 = self.inc(x)
+        x2 = self.down1(x1)
+        x3 = self.down2(x2)
+        x4 = self.down3(x3)
+        x5 = self.down4(x4)
+
+        block1 = self.block1(x1)
+        block2 = self.block2(block1)
+        block3 = self.block3(block2)
+        block4 = self.block4(block3)
+
+        x = self.up1(x5, x4, block4)
+        # x = torch.cat(x, block4)      
+        x = self.up2(x, x3, block3)
+        # x = torch.cat(x, block3)
+        x = self.up3(x, x2, block2)
+        # x = torch.cat(x, block2)
+        x = self.up4(x, x1, block1)
+        # x = torch.cat(x, block1)
+        logits = self.outc(x)
+        return logits
+
+# x = torch.randn(1, 1, 32, 400)
+# net = InceptionUNet()
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/model.py b/code/recognization/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c845a7bbb547f514d3329f28032c7b66996da426
--- /dev/null
+++ b/code/recognization/model.py
@@ -0,0 +1,135 @@
+import torch
+
+from .feature_extraction import HRNet_FeatureExtractor
+from .sequence_modeling import BidirectionalLSTM
+from .dropout_layer import dropout_layer
+from .prediction import Attention
+import torch.nn as nn
+
+# Other CNN Architectures
+from .feature_extraction import DenseNet_FeatureExtractor, InceptionUNet_FeatureExtractor
+from .feature_extraction import RCNN_FeatureExtractor, ResNet_FeatureExtractor
+from .feature_extraction import ResUnet_FeatureExtractor, AttnUNet_FeatureExtractor
+from .feature_extraction import UNet_FeatureExtractor, UNetPlusPlus_FeatureExtractor
+from .feature_extraction import VGG_FeatureExtractor
+
+# Other sequential models
+from .sequence_modeling import LSTM, GRU, MDLSTM
+
+class Text_recognization_model(nn.Module):
+
+    """ The constractor init the struture of the model """
+    def __init__(self, opt):
+        super(Text_recognization_model, self).__init__()
+        # opt is the configration of the model
+        self.opt = opt
+        # The model consist of three stages
+        # FeatureExtraction, SequenceModeling and Prediction
+        self.stages = {'Feat': opt.FeatureExtraction,
+                       'Seq': opt.SequenceModeling,
+                       'Pred': opt.Prediction}
+
+        """ FeatureExtraction """
+        # High-Resolution Network, it maintains high-resolution feature maps
+        if opt.FeatureExtraction == 'HRNet':
+            self.FeatureExtraction = HRNet_FeatureExtractor(opt.input_channel, opt.output_channel )
+        elif opt.FeatureExtraction == 'Densenet':
+            self.FeatureExtraction = DenseNet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'InceptionUnet':
+            self.FeatureExtraction = InceptionUNet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'RCNN':
+            self.FeatureExtraction = RCNN_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'ResNet':
+            self.FeatureExtraction = ResNet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'ResUnet':
+            self.FeatureExtraction = ResUnet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'AttnUNet':
+            self.FeatureExtraction = AttnUNet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'UNet':
+            self.FeatureExtraction = UNet_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'UnetPlusPlus':
+            self.FeatureExtraction = UNetPlusPlus_FeatureExtractor(opt.input_channel, opt.output_channel)
+        elif opt.FeatureExtraction == 'VGG':
+            self.FeatureExtraction = VGG_FeatureExtractor(opt.input_channel, opt.output_channel)
+        else:
+            raise Exception('No FeatureExtraction module specified')
+        self.FeatureExtraction_output = opt.output_channel
+        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1)) # Transform final (imgH/16-1) -> 1
+        
+        """
+        Temporal Dropout
+        """
+        self.dropout1 = dropout_layer(opt.device)
+        self.dropout2 = dropout_layer(opt.device)
+        self.dropout3 = dropout_layer(opt.device)
+        self.dropout4 = dropout_layer(opt.device)
+        self.dropout5 = dropout_layer(opt.device)
+
+        """ Sequence modeling"""
+        if opt.SequenceModeling == 'LSTM':
+            self.SequenceModeling = LSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size)
+        elif opt.SequenceModeling == 'GRU':
+            self.SequenceModeling = GRU(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size)
+        elif opt.SequenceModeling == 'MDLSTM':
+            self.SequenceModeling = MDLSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size)
+        elif opt.SequenceModeling == 'BiLSTM':
+            self.SequenceModeling = BidirectionalLSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size)
+        # Double BiLSTM 
+        elif opt.SequenceModeling == 'DBiLSTM':
+            self.SequenceModeling = nn.Sequential(
+                BidirectionalLSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size),
+                BidirectionalLSTM(opt.hidden_size, opt.hidden_size, opt.hidden_size))
+        else:
+            raise Exception('No Sequence Modeling module specified')
+        self.SequenceModeling_output = opt.hidden_size
+
+        """ Prediction """
+        if opt.Prediction == 'CTC':
+            self.Prediction = nn.Linear(self.SequenceModeling_output, opt.num_class)
+        elif opt.Prediction == 'Attn':
+            self.Prediction = Attention(self.SequenceModeling_output, opt.hidden_size, opt.num_class, opt.device)
+        else:
+            raise Exception('Prediction is neither CTC or Attn')
+
+    def forward(self, input, text=None, is_train=True):
+        """ Feature extraction stage """
+        ### Pass input to the feature extraction network ###
+        visual_feature = self.FeatureExtraction(input)
+        # print(visual_feature.shape) # [32, 32, 32, 400] #HRNet, [32, 512, 32, 400] #UNet
+        ### Then make pooling ###
+        visual_feature = self.AdaptiveAvgPool(visual_feature.permute(0, 3, 1, 2))  # [b, c, h, w] -> [b, w, c, h]
+        # print(visual_feature.shape) # [32, 400, 32, 1] #HRNet, [32, 400, 512, 1] #UNet
+        ### Remove the columb 3 Ex=> [32,400,32,1] will be [32,400,32] ###
+        visual_feature = visual_feature.squeeze(3)
+        # print(visual_feature.shape) # [32, 400, 32] #HRNet, [32, 400, 512] #UNet
+
+        
+        """ Temporal Dropout + Sequence modeling stage """
+        # contextual_feature = self.SequenceModeling(visual_feature) ##### Without temporal dropout
+        if (self.training):
+            visual_feature_after_dropout1 = self.dropout1(visual_feature)
+            contextual_feature = self.SequenceModeling(visual_feature_after_dropout1)
+        else :
+            # Inference Phase, make multiple dropout, and take the average of them, this  is called Monte Carlo Dropout 
+            visual_feature_after_dropout1 = self.dropout1(visual_feature)
+            visual_feature_after_dropout2 = self.dropout2(visual_feature)
+            visual_feature_after_dropout3 = self.dropout3(visual_feature)
+            visual_feature_after_dropout4 = self.dropout4(visual_feature)
+            visual_feature_after_dropout5 = self.dropout5(visual_feature)
+            contextual_feature1 = self.SequenceModeling(visual_feature_after_dropout1)
+            contextual_feature2 = self.SequenceModeling(visual_feature_after_dropout2)
+            contextual_feature3 = self.SequenceModeling(visual_feature_after_dropout3)
+            contextual_feature4 = self.SequenceModeling(visual_feature_after_dropout4)
+            contextual_feature5 = self.SequenceModeling(visual_feature_after_dropout5)
+            contextual_feature =  ( (contextual_feature1).add ((contextual_feature2).add(((contextual_feature3).add(((contextual_feature4).add(contextual_feature5)))))) ) * (1/5)
+
+        """ Prediction stage """
+        if self.stages['Pred'] == 'CTC':
+            prediction = self.Prediction(contextual_feature.contiguous())
+        else:
+            if text is None:
+                raise Exception('Input text (for prediction) to model is None')
+            text = text.to(self.opt.device)
+            prediction = self.Prediction(contextual_feature, text, is_train, batch_max_length=self.opt.batch_max_length)
+        
+        return prediction
diff --git a/code/recognization/my_test.py b/code/recognization/my_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0064744eb7b0ca5465bee5c960c139dd38ee30d2
--- /dev/null
+++ b/code/recognization/my_test.py
@@ -0,0 +1,235 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import os,shutil
+import time
+import argparse
+import random
+import numpy as np
+import matplotlib.pyplot as plt
+from datetime import datetime
+import pytz
+
+import torch
+import torch.utils.data
+import torch.nn.functional as F
+from tqdm import tqdm
+from nltk.metrics.distance import edit_distance
+
+from utils import CTCLabelConverter, AttnLabelConverter, Averager, Logger
+from dataset import hierarchical_dataset, AlignCollate
+#from model import Model
+
+def validation(model, criterion, evaluation_loader, converter, opt, device):
+    """ validation or evaluation """
+    eval_arr = []
+    sum_len_gt = 0
+
+    n_correct = 0
+    
+    norm_ED = 0
+    length_of_data = 0
+    infer_time = 0
+    valid_loss_avg = Averager()
+
+    for i, (image_tensors, labels) in enumerate(tqdm(evaluation_loader)):
+        batch_size = image_tensors.size(0)
+        length_of_data = length_of_data + batch_size
+        image = image_tensors.to(device)
+        # For max length prediction
+        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
+        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)
+
+        text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length)
+
+        start_time = time.time()
+        if 'CTC' in opt.Prediction:
+            preds = model(image)
+            forward_time = time.time() - start_time
+            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
+            cost = criterion(preds.log_softmax(2).permute(1, 0, 2), text_for_loss, preds_size, length_for_loss)
+            _, preds_index = preds.max(2)
+            preds_str = converter.decode(preds_index.data, preds_size.data)
+        else:
+            preds = model(image, text=text_for_pred, is_train=False)
+            forward_time = time.time() - start_time
+
+            preds = preds[:, :text_for_loss.shape[1] - 1, :].to(device)
+            target = text_for_loss[:, 1:].to(device)  # without [GO] Symbol
+            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1))
+            _, preds_index = preds.max(2)
+            preds_str = converter.decode(preds_index, length_for_pred)
+            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)
+
+        infer_time += forward_time
+        valid_loss_avg.add(cost)
+
+        # calculate accuracy & confidence score
+        preds_prob = F.softmax(preds, dim=2)
+        preds_max_prob, _ = preds_prob.max(dim=2)
+        confidence_score_list = []
+        for gt, pred, pred_max_prob in zip(labels, preds_str, preds_max_prob):
+            if 'Attn' in opt.Prediction:
+                gt = gt[:gt.find('[s]')]
+                pred_EOS = pred.find('[s]')
+                pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
+                pred_max_prob = pred_max_prob[:pred_EOS]
+
+            if pred == gt:
+                n_correct += 1
+
+            # ICDAR2019 Normalized Edit Distance
+            if len(gt) == 0 or len(pred) == 0:
+                ED = 0
+            elif len(gt) > len(pred):
+                ED = 1 - edit_distance(pred, gt) / len(gt)
+            else:
+                ED = 1 - edit_distance(pred, gt) / len(pred)
+
+            eval_arr.append([gt,pred,ED])
+
+            sum_len_gt += len(gt)
+            norm_ED += (ED*len(gt))
+
+            # calculate confidence score (= multiply of pred_max_prob)
+            try:
+                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
+            except:
+                confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
+            confidence_score_list.append(confidence_score)
+            # print(pred, gt, pred==gt, confidence_score)
+
+    accuracy = n_correct / float(length_of_data) * 100
+    norm_ED = norm_ED / float(sum_len_gt)
+
+    return valid_loss_avg.val(), accuracy, norm_ED, eval_arr
+
+
+def test(opt, device):
+    opt.device = device
+    os.makedirs("test_outputs", exist_ok=True)
+    datetime_now = str(datetime.now(pytz.timezone('Asia/Kolkata')).strftime("%Y-%m-%d_%H-%M-%S"))
+    logger = Logger(f'test_outputs/{datetime_now}.txt')
+    """ model configuration """
+    if 'CTC' in opt.Prediction:
+        converter = CTCLabelConverter(opt.character)
+    else:
+        converter = AttnLabelConverter(opt.character)
+    opt.num_class = len(converter.character)
+
+    if opt.rgb:
+        opt.input_channel = 3
+    model = Model(opt)
+    logger.log('model input parameters', opt.imgH, opt.imgW, opt.input_channel, opt.output_channel,
+          opt.hidden_size, opt.num_class, opt.batch_max_length, opt.FeatureExtraction,
+          opt.SequenceModeling, opt.Prediction)
+    model = model.to(device)
+
+    # load model
+    model.load_state_dict(torch.load(opt.saved_model, map_location=device))
+    logger.log('Loaded pretrained model from %s' % opt.saved_model)
+    # logger.log(model)
+
+    """ setup loss """
+    if 'CTC' in opt.Prediction:
+        criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
+    else:
+        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)  # ignore [GO] token = ignore index 0
+
+    """ evaluation """
+    model.eval()
+    with torch.no_grad():
+        AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW)#, keep_ratio_with_pad=opt.PAD)
+        eval_data, eval_data_log = hierarchical_dataset(root=opt.eval_data, opt=opt, rand_aug=False)
+        logger.log(eval_data_log)
+        evaluation_loader = torch.utils.data.DataLoader(
+            eval_data, batch_size=opt.batch_size,
+            shuffle=False,
+            num_workers=int(opt.workers),
+            collate_fn=AlignCollate_evaluation, pin_memory=True)
+        _, accuracy, norm_ED, eval_arr = validation( model, criterion, evaluation_loader, converter, opt,device)
+    logger.log("="*20)
+    logger.log(f'Accuracy : {accuracy:0.4f}\n')
+    logger.log(f'Norm_ED : {norm_ED:0.4f}\n')
+    logger.log("="*20)
+    
+    if opt.visualize:
+        logger.log("Threshold - ", opt.threshold)
+        logger.log("ED","\t","gt","\t","pred")
+        arr = []
+        for gt,pred,ED in eval_arr:
+            ED = ED*100.0
+            arr.append(ED)
+            if ED<=(opt.threshold):
+                logger.log(ED,"\t",gt,"\t",pred)
+        plt.hist(arr, edgecolor="red")
+        plt.savefig('test_outputs/'+str(datetime_now)+".png")
+        plt.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--visualize', action='store_true', help='for visualization of bad samples')
+    parser.add_argument('--threshold', type=float, help='Save samples below this threshold in txt file', default=50.0)
+    parser.add_argument('--eval_data', required=True, help='path to evaluation dataset')
+    parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
+    parser.add_argument('--batch_size', type=int, default=32, help='input batch size')
+    parser.add_argument('--saved_model', required=True, help="path to saved_model to evaluation")
+    """ Data processing """
+    parser.add_argument('--batch_max_length', type=int, default=100, help='maximum-label-length')
+    parser.add_argument('--imgH', type=int, default=32, help='the height of the input image')
+    parser.add_argument('--imgW', type=int, default=400, help='the width of the input image')
+    parser.add_argument('--rgb', action='store_true', help='use rgb input')
+    """ Model Architecture """
+    parser.add_argument('--FeatureExtraction', type=str, default="HRNet", #required=True,
+                        help='FeatureExtraction stage VGG|RCNN|ResNet|UNet|HRNet|Densenet|InceptionUnet|ResUnet|AttnUNet|UNet|VGG')
+    parser.add_argument('--SequenceModeling', type=str, default="DBiLSTM", #required=True,
+                        help='SequenceModeling stage LSTM|GRU|MDLSTM|BiLSTM|DBiLSTM')
+    parser.add_argument('--Prediction', type=str, default="CTC", #required=True,
+                        help='Prediction stage CTC|Attn')
+    parser.add_argument('--input_channel', type=int, default=1, help='the number of input channel of Feature extractor')
+    parser.add_argument('--output_channel', type=int, default=512, help='the number of output channel of Feature extractor')
+    parser.add_argument('--hidden_size', type=int, default=256, help='the size of the LSTM hidden state')
+    """ GPU Selection """
+    parser.add_argument('--device_id', type=str, default=None, help='cuda device ID')
+
+    opt = parser.parse_args()
+    if opt.FeatureExtraction == "HRNet":
+        opt.output_channel = 32
+    
+    # Fix random seeds for both numpy and pytorch
+    seed = 1111
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    """ vocab / character number configuration """
+    file = open("UrduGlyphs.txt","r",encoding="utf-8")
+    content = file.readlines()
+    content = ''.join([str(elem).strip('\n') for elem in content])
+    opt.character = content+" "
+    
+    cuda_str = 'cuda'
+    if opt.device_id is not None:
+        cuda_str = f'cuda:{opt.device_id}'
+    device = torch.device(cuda_str if torch.cuda.is_available() else 'cpu')
+    print("Device : ", device)
+    
+    # opt.eval_data = "/DATA/parseq/val/"
+    # test(opt, device)
+    
+    # opt.eval_data = "/DATA/parseq/IIITH/lmdb_new/"
+    # test(opt, device)
+    
+    # opt.eval_data = "/DATA/public_datasets/UPTI/valid/"
+    # test(opt, device)
+    
+    test(opt, device)
diff --git a/code/recognization/prediction.py b/code/recognization/prediction.py
new file mode 100755
index 0000000000000000000000000000000000000000..aef573bb2b1f9dfcac964209ac49f336c18285fc
--- /dev/null
+++ b/code/recognization/prediction.py
@@ -0,0 +1,89 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class Attention(nn.Module):
+
+    def __init__(self, input_size, hidden_size, num_classes, device):
+        super(Attention, self).__init__()
+        self.attention_cell = AttentionCell(input_size, hidden_size, num_classes)
+        self.hidden_size = hidden_size
+        self.num_classes = num_classes
+        self.generator = nn.Linear(hidden_size, num_classes)
+        self.device = device
+
+    def _char_to_onehot(self, input_char, onehot_dim=38):
+        input_char = input_char.unsqueeze(1)
+        batch_size = input_char.size(0)
+        one_hot = torch.FloatTensor(batch_size, onehot_dim).zero_().to(self.device)
+        one_hot = one_hot.scatter_(1, input_char, 1)
+        return one_hot
+
+    def forward(self, batch_H, text, is_train=True, batch_max_length=25):
+        """
+        input:
+            batch_H : contextual_feature H = hidden state of encoder. [batch_size x num_steps x contextual_feature_channels]
+            text : the text-index of each image. [batch_size x (max_length+1)]. +1 for [GO] token. text[:, 0] = [GO].
+        output: probability distribution at each step [batch_size x num_steps x num_classes]
+        """
+        batch_size = batch_H.size(0)
+        num_steps = batch_max_length + 1  # +1 for [s] at end of sentence.
+
+        output_hiddens = torch.FloatTensor(batch_size, num_steps, self.hidden_size).fill_(0).to(self.device)
+        hidden = (torch.FloatTensor(batch_size, self.hidden_size).fill_(0).to(self.device),
+                  torch.FloatTensor(batch_size, self.hidden_size).fill_(0).to(self.device))
+
+        if is_train:
+            for i in range(num_steps):
+                # one-hot vectors for a i-th char. in a batch
+                char_onehots = self._char_to_onehot(text[:, i], onehot_dim=self.num_classes)
+                # hidden : decoder's hidden s_{t-1}, batch_H : encoder's hidden H, char_onehots : one-hot(y_{t-1})
+                hidden, _ = self.attention_cell(hidden, batch_H, char_onehots)
+                output_hiddens[:, i, :] = hidden[0]  # LSTM hidden index (0: hidden, 1: Cell)
+            probs = self.generator(output_hiddens)
+
+        else:
+            targets = torch.LongTensor(batch_size).fill_(0).to(self.device)  # [GO] token
+            probs = torch.FloatTensor(batch_size, num_steps, self.num_classes).fill_(0).to(self.device)
+
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(targets, onehot_dim=self.num_classes)
+                hidden, _ = self.attention_cell(hidden, batch_H, char_onehots)
+                probs_step = self.generator(hidden[0])
+                probs[:, i, :] = probs_step
+                _, next_input = probs_step.max(1)
+                targets = next_input
+
+        return probs  # batch_size x num_steps x num_classes
+
+
+class AttentionCell(nn.Module):
+
+    def __init__(self, input_size, hidden_size, num_embeddings):
+        super(AttentionCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)  # either i2i or h2h should have bias
+        self.score = nn.Linear(hidden_size, 1, bias=False)
+        self.rnn = nn.LSTMCell(input_size + num_embeddings, hidden_size)
+        self.hidden_size = hidden_size
+
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        # [batch_size x num_encoder_step x num_channel] -> [batch_size x num_encoder_step x hidden_size]
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = self.h2h(prev_hidden[0]).unsqueeze(1)
+        e = self.score(torch.tanh(batch_H_proj + prev_hidden_proj))  # batch_size x num_encoder_step * 1
+
+        alpha = F.softmax(e, dim=1)
+        context = torch.bmm(alpha.permute(0, 2, 1), batch_H).squeeze(1)  # batch_size x num_channel
+        concat_context = torch.cat([context, char_onehots], 1)  # batch_size x (num_channel + num_embedding)
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+        return cur_hidden, alpha
diff --git a/code/recognization/rcnn.py b/code/recognization/rcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..1432f26d91cfe864b5ccd1e8f4f3a9c1515f7eff
--- /dev/null
+++ b/code/recognization/rcnn.py
@@ -0,0 +1,89 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+from torch import nn
+import torch.nn.functional as F
+
+# For Gated RCNN
+class GRCL(nn.Module):
+
+    def __init__(self, input_channel, output_channel, num_iteration, kernel_size, pad):
+        super(GRCL, self).__init__()
+        self.wgf_u = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=False)
+        self.wgr_x = nn.Conv2d(output_channel, output_channel, 1, 1, 0, bias=False)
+        self.wf_u = nn.Conv2d(input_channel, output_channel, kernel_size, 1, pad, bias=False)
+        self.wr_x = nn.Conv2d(output_channel, output_channel, kernel_size, 1, pad, bias=False)
+
+        self.BN_x_init = nn.BatchNorm2d(output_channel)
+
+        self.num_iteration = num_iteration
+        self.GRCL = [GRCL_unit(output_channel) for _ in range(num_iteration)]
+        self.GRCL = nn.Sequential(*self.GRCL)
+
+    def forward(self, input):
+        """ The input of GRCL is consistant over time t, which is denoted by u(0)
+        thus wgf_u / wf_u is also consistant over time t.
+        """
+        wgf_u = self.wgf_u(input)
+        wf_u = self.wf_u(input)
+        x = F.relu(self.BN_x_init(wf_u))
+
+        for i in range(self.num_iteration):
+            x = self.GRCL[i](wgf_u, self.wgr_x(x), wf_u, self.wr_x(x))
+
+        return x
+
+class GRCL_unit(nn.Module):
+
+    def __init__(self, output_channel):
+        super(GRCL_unit, self).__init__()
+        self.BN_gfu = nn.BatchNorm2d(output_channel)
+        self.BN_grx = nn.BatchNorm2d(output_channel)
+        self.BN_fu = nn.BatchNorm2d(output_channel)
+        self.BN_rx = nn.BatchNorm2d(output_channel)
+        self.BN_Gx = nn.BatchNorm2d(output_channel)
+
+    def forward(self, wgf_u, wgr_x, wf_u, wr_x):
+        G_first_term = self.BN_gfu(wgf_u)
+        G_second_term = self.BN_grx(wgr_x)
+        G = F.sigmoid(G_first_term + G_second_term)
+
+        x_first_term = self.BN_fu(wf_u)
+        x_second_term = self.BN_Gx(self.BN_rx(wr_x) * G)
+        x = F.relu(x_first_term + x_second_term)
+
+        return x
+
+class RCNN(nn.Module):
+    """ FeatureExtractor of GRCNN (https://papers.nips.cc/paper/6637-gated-recurrent-convolution-neural-network-for-ocr.pdf) """
+
+    def __init__(self, input_channel=1, output_channel=512):
+        super(RCNN, self).__init__()
+        self.output_channel = [int(output_channel / 8), int(output_channel / 4),
+                               int(output_channel / 2), output_channel]  # [64, 128, 256, 512]
+        self.ConvNet = nn.Sequential(
+            nn.Conv2d(input_channel, self.output_channel[0], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),  # 64 x 16 x 50
+            GRCL(self.output_channel[0], self.output_channel[0], num_iteration=5, kernel_size=3, pad=1),
+            nn.MaxPool2d(2, 2),  # 64 x 8 x 25
+            GRCL(self.output_channel[0], self.output_channel[1], num_iteration=5, kernel_size=3, pad=1),
+            nn.MaxPool2d(2, (2, 1), (0, 1)),  # 128 x 4 x 26
+            GRCL(self.output_channel[1], self.output_channel[2], num_iteration=5, kernel_size=3, pad=1),
+            nn.MaxPool2d(2, (2, 1), (0, 1)),  # 256 x 2 x 27
+            nn.Conv2d(self.output_channel[2], self.output_channel[3], 2, 1, 0, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True))  # 512 x 1 x 26
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+# import torch    
+# x = torch.randn(1, 1, 32, 400)
+# net = RCNN()
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/recognization.py b/code/recognization/recognization.py
new file mode 100644
index 0000000000000000000000000000000000000000..2869cdbf22c86c9c0cddb402ce77d425baf2bc08
--- /dev/null
+++ b/code/recognization/recognization.py
@@ -0,0 +1,92 @@
+from .config import Config
+from .model import Text_recognization_model
+import os
+import torch
+
+from .utils import CTCLabelConverter,Averager
+
+from PIL import Image
+import math
+import numpy as np
+from .dataset import NormalizePAD
+import tempfile
+
+
+import os
+import math
+import numpy as np
+from PIL import Image
+import torch
+
+class TextRecognition:
+    def __init__(self,model_path='model/recognization_model.pth' , device='cpu' ):
+        # Initialize configuration
+        self.opt = Config()
+        self.opt.device = device
+        self.model_path = model_path
+
+        # Load characters from UrduGlyphs.txt
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        file_path = os.path.join(current_dir, "words.txt")
+        with open(file_path, "r", encoding="utf-8") as file:
+            content = file.readlines()
+        self.opt.character = ''.join([str(elem).strip('\n') for elem in content]) + " "
+
+        # Initialize converter based on prediction type
+        if 'CTC' in self.opt.Prediction:
+            self.converter = CTCLabelConverter(self.opt.character)
+        else:
+            self.converter = AttnLabelConverter(self.opt.character)
+
+        # Set the number of classes
+        self.opt.num_class = len(self.converter.character)
+
+        # Load the model
+        model_path = os.path.join(current_dir, self.model_path)
+        self.model = Text_recognization_model(self.opt)
+
+        self.model.load_state_dict(torch.load(model_path, map_location=self.opt.device, weights_only=True))  # Load weights
+        self.model = self.model.to(self.opt.device)
+        self.model.eval()  # Set model to evaluation mode
+
+    def recognize_image(self, image):
+        # Preprocess the input image
+        if isinstance(image, str):  # If the input is a file path
+            pil_image = Image.open(image).convert('L')  # Convert to grayscale
+        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
+            if len(image.shape) == 3 and image.shape[2] == 3:  # RGB image
+                # Convert RGB to grayscale using weights
+                gray_array = np.dot(image[..., :3], [0.2989, 0.5870, 0.1140])
+                pil_image = Image.fromarray(gray_array.astype('uint8'))  # Convert to PIL Image
+            elif len(image.shape) == 2:  # Already grayscale
+                pil_image = Image.fromarray(image.astype('uint8'))  # Convert to PIL Image
+            else:
+                raise ValueError("Unsupported image format!")
+        else:
+            raise TypeError("Input must be a file path (str) or a NumPy array.")
+
+        # Preprocess and resize image
+        pil_image = pil_image.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+        w, h = pil_image.size
+        ratio = w / float(h)
+
+        if math.ceil(self.opt.imgH * ratio) > self.opt.imgW:
+            resized_w = self.opt.imgW
+        else:
+            resized_w = math.ceil(self.opt.imgH * ratio)
+        pil_image = pil_image.resize((resized_w, self.opt.imgH), Image.Resampling.BICUBIC)
+
+        # Normalize and prepare image for the model
+        transform = NormalizePAD((1, self.opt.imgH, self.opt.imgW))
+        img = transform(pil_image)
+        img = img.unsqueeze(0)  # Add batch dimension
+        img = img.to(self.opt.device)
+
+        # Perform prediction
+        preds = self.model(img)
+        preds_size = torch.IntTensor([preds.size(1)])
+        _, preds_index = preds.max(2)
+        preds_str = self.converter.decode(preds_index.data, preds_size.data)[0]
+
+        return preds_str
+
diff --git a/code/recognization/resnet.py b/code/recognization/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..267a7b5ea84b34c4f94bf98cca36ac0da63c6015
--- /dev/null
+++ b/code/recognization/resnet.py
@@ -0,0 +1,177 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+
+# Code For ResNet Feature Extractor
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = self._conv3x3(inplanes, planes)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = self._conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def _conv3x3(self, in_planes, out_planes, stride=1):
+        "3x3 convolution with padding"
+        return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                         padding=1, bias=False)
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+class ResNet_model(nn.Module):
+
+    def __init__(self, input_channel, output_channel, block, layers):
+        super(ResNet_model, self).__init__()
+
+        self.output_channel_block = [int(output_channel / 4), int(output_channel / 2), output_channel, output_channel]
+
+        self.inplanes = int(output_channel / 8)
+        self.conv0_1 = nn.Conv2d(input_channel, int(output_channel / 16),
+                                 kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn0_1 = nn.BatchNorm2d(int(output_channel / 16))
+        self.conv0_2 = nn.Conv2d(int(output_channel / 16), self.inplanes,
+                                 kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn0_2 = nn.BatchNorm2d(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+
+        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.layer1 = self._make_layer(block, self.output_channel_block[0], layers[0])
+        self.conv1 = nn.Conv2d(self.output_channel_block[0], self.output_channel_block[
+                               0], kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(self.output_channel_block[0])
+
+        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.layer2 = self._make_layer(block, self.output_channel_block[1], layers[1], stride=1)
+        self.conv2 = nn.Conv2d(self.output_channel_block[1], self.output_channel_block[
+                               1], kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(self.output_channel_block[1])
+
+        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=(2, 1), padding=(0, 1))
+        self.layer3 = self._make_layer(block, self.output_channel_block[2], layers[2], stride=1)
+        self.conv3 = nn.Conv2d(self.output_channel_block[2], self.output_channel_block[
+                               2], kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.output_channel_block[2])
+
+        self.layer4 = self._make_layer(block, self.output_channel_block[3], layers[3], stride=1)
+        self.conv4_1 = nn.Conv2d(self.output_channel_block[3], self.output_channel_block[
+                                 3], kernel_size=2, stride=(2, 1), padding=(0, 1), bias=False)
+        self.bn4_1 = nn.BatchNorm2d(self.output_channel_block[3])
+        self.conv4_2 = nn.Conv2d(self.output_channel_block[3], self.output_channel_block[
+                                 3], kernel_size=2, stride=1, padding=0, bias=False)
+        self.bn4_2 = nn.BatchNorm2d(self.output_channel_block[3])
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        
+        # print ("INPUT SHAPE", x.shape)
+        # INPUT SHAPE torch.Size([16, 1, 32, 400])
+        
+        x = self.conv0_1(x)
+        x = self.bn0_1(x)
+        x = self.relu(x)
+        x = self.conv0_2(x)
+        x = self.bn0_2(x)
+        x = self.relu(x)
+        
+        # ([16, 64, 32, 400])
+        # print ("XXXX", x.shape)
+
+        x = self.maxpool1(x)
+        
+        # print ("After 1st Block", x.shape)
+        # After 1st Block torch.Size([16, 64, 16, 200])
+        x = self.layer1(x)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x = self.maxpool2(x)
+        
+        # print ("After 2nd Block", x.shape)
+        # After 2nd Block torch.Size([16, 128, 8, 100])
+        x = self.layer2(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+
+        x = self.maxpool3(x)
+        
+        # print ("After 3rd Block", x.shape)
+        # After 3rd Block torch.Size([16, 256, 4, 101])
+        x = self.layer3(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu(x)
+
+        # print ("After 4th Block", x.shape)
+        # After 4th Block torch.Size([16, 512, 4, 101])
+        x = self.layer4(x)
+        x = self.conv4_1(x)
+        x = self.bn4_1(x)
+        x = self.relu(x)
+        x = self.conv4_2(x)
+        x = self.bn4_2(x)
+        x = self.relu(x)
+        
+        # print ("Output Shape", x.shape)
+        # Output Shape torch.Size([16, 512, 1, 101])
+        
+        return x
+
+class ResNet(nn.Module):
+    """ FeatureExtractor of FAN (http://openaccess.thecvf.com/content_ICCV_2017/papers/Cheng_Focusing_Attention_Towards_ICCV_2017_paper.pdf) """
+    def __init__(self, input_channel=1, output_channel=512):
+        super(ResNet, self).__init__()
+        self.ConvNet = ResNet_model(input_channel, output_channel, BasicBlock, [1, 2, 5, 3])
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+# import torch    
+# x = torch.randn(1, 1, 32, 400)
+# net = ResNet()
+# out = net(x)
+# print(out.shape)
diff --git a/code/recognization/resunet.py b/code/recognization/resunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..47f08f3f526ce641ede97c20f719df469e3dccfd
--- /dev/null
+++ b/code/recognization/resunet.py
@@ -0,0 +1,116 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+import torch
+
+'''
+Source - https://github.com/rishikksh20/ResUnet/blob/master/core/res_unet.py
+An implementation of this paper - https://arxiv.org/pdf/1711.10684.pdf
+'''
+
+class ResidualConv(nn.Module):
+    def __init__(self, input_dim, output_dim, stride, padding):
+        super(ResidualConv, self).__init__()
+
+        self.conv_block = nn.Sequential(
+            nn.BatchNorm2d(input_dim),
+            nn.ReLU(),
+            nn.Conv2d(
+                input_dim, output_dim, kernel_size=3, stride=stride, padding=padding
+            ),
+            nn.BatchNorm2d(output_dim),
+            nn.ReLU(),
+            nn.Conv2d(output_dim, output_dim, kernel_size=3, padding=1),
+        )
+        self.conv_skip = nn.Sequential(
+            nn.Conv2d(input_dim, output_dim, kernel_size=3, stride=stride, padding=1),
+            nn.BatchNorm2d(output_dim),
+        )
+
+    def forward(self, x):
+
+        return self.conv_block(x) + self.conv_skip(x)
+
+
+class Upsample(nn.Module):
+    def __init__(self, input_dim, output_dim, kernel, stride):
+        super(Upsample, self).__init__()
+
+        self.upsample = nn.ConvTranspose2d(
+            input_dim, output_dim, kernel_size=kernel, stride=stride
+        )
+
+    def forward(self, x):
+        return self.upsample(x)
+    
+class ResUnet(nn.Module):
+    def __init__(self, in_channel=1,out_channel=512, filters=[64, 128, 256, 512]):
+        super(ResUnet, self).__init__()
+
+        self.input_layer = nn.Sequential(
+            nn.Conv2d(in_channel, filters[0], kernel_size=3, padding=1),
+            nn.BatchNorm2d(filters[0]),
+            nn.ReLU(),
+            nn.Conv2d(filters[0], filters[0], kernel_size=3, padding=1),
+        )
+        self.input_skip = nn.Sequential(
+            nn.Conv2d(in_channel, filters[0], kernel_size=3, padding=1)
+        )
+
+        self.residual_conv_1 = ResidualConv(filters[0], filters[1], 2, 1)
+        self.residual_conv_2 = ResidualConv(filters[1], filters[2], 2, 1)
+
+        self.bridge = ResidualConv(filters[2], filters[3], 2, 1)
+
+        self.upsample_1 = Upsample(filters[3], filters[3], 2, 2)
+        self.up_residual_conv1 = ResidualConv(filters[3] + filters[2], filters[2], 1, 1)
+
+        self.upsample_2 = Upsample(filters[2], filters[2], 2, 2)
+        self.up_residual_conv2 = ResidualConv(filters[2] + filters[1], filters[1], 1, 1)
+
+        self.upsample_3 = Upsample(filters[1], filters[1], 2, 2)
+        self.up_residual_conv3 = ResidualConv(filters[1] + filters[0], filters[0], 1, 1)
+
+        self.output_layer = nn.Sequential(
+            nn.Conv2d(filters[0], out_channel, 1, 1),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        # Encode
+        x1 = self.input_layer(x) + self.input_skip(x)
+        x2 = self.residual_conv_1(x1)
+        x3 = self.residual_conv_2(x2)
+        # Bridge
+        x4 = self.bridge(x3)
+        # Decode
+        x4 = self.upsample_1(x4)
+        x5 = torch.cat([x4, x3], dim=1)
+
+        x6 = self.up_residual_conv1(x5)
+
+        x6 = self.upsample_2(x6)
+        x7 = torch.cat([x6, x2], dim=1)
+
+        x8 = self.up_residual_conv2(x7)
+
+        x8 = self.upsample_3(x8)
+        x9 = torch.cat([x8, x1], dim=1)
+
+        x10 = self.up_residual_conv3(x9)
+
+        output = self.output_layer(x10)
+
+        return output
+
+# x = torch.randn(1, 1, 32, 400)
+# net = ResUnet(1,512)
+# out = net(x)
+# print(out.shape)
diff --git a/code/recognization/sequence_modeling.py b/code/recognization/sequence_modeling.py
new file mode 100644
index 0000000000000000000000000000000000000000..10eec346f08e8ca37a69c5173fb44d6aa8785396
--- /dev/null
+++ b/code/recognization/sequence_modeling.py
@@ -0,0 +1,105 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+
+class BidirectionalLSTM(nn.Module):
+
+    def __init__(self, input_size, hidden_size, output_size):
+        super(BidirectionalLSTM, self).__init__()
+        self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
+        self.linear = nn.Linear(hidden_size * 2, output_size)
+
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        self.rnn.flatten_parameters()
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+
+class LSTM(nn.Module):
+
+    def __init__(self, input_size, hidden_size, output_size):
+        super(LSTM, self).__init__()
+        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.linear = nn.Linear(hidden_size, output_size)
+
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        self.rnn.flatten_parameters()
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x hidden_size
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+
+class GRU(nn.Module):
+
+    def __init__(self, input_size, hidden_size, output_size):
+        super(GRU, self).__init__()
+        self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)
+        self.linear = nn.Linear(hidden_size, output_size)
+
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        self.rnn.flatten_parameters()
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x hidden_size
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+
+class MDLSTM(nn.Module):
+    # The visual features of textline are given as input to a MDLSTM
+    # Each of the LSTMs then recursively maps these features into a lower dimensional space
+    # The standard one dimensional LSTM network can be extended to multiple dimensions by using n self connections with n forget gates
+    # Inspired by HM-LSTM originally proposed in - https://arxiv.org/pdf/1609.01704.pdf
+    def __init__(self, input_size, hidden_size, output_size):
+        super(MDLSTM, self).__init__()
+        self.rnn = nn.Sequential(
+                LSTM(input_size, hidden_size, 2*hidden_size),
+                LSTM(2*hidden_size, hidden_size, 4*hidden_size),
+                LSTM(4*hidden_size, hidden_size, 2*hidden_size),
+                LSTM(2*hidden_size, hidden_size, hidden_size))
+        self.linear = nn.Linear(hidden_size, output_size)
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        for rnn in self.rnn:
+            rnn.rnn.flatten_parameters()
+        recurrent = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x hidden_size
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+
+# import torch
+# x = torch.randn(1,100, 512)
+# net1 = BidirectionalLSTM(512, 256, 512)
+# net2 = LSTM(512, 256, 512)
+# net3 = GRU(512, 256, 512)
+# net4 = MDLSTM(512, 256, 512)
+
+# print("=========================================")
+# out1 = net1(x)
+# print(out1.shape)
+# print("=========================================")
+# out2 = net2(x)
+# print(out2.shape)
+# print("=========================================")
+# out3 = net3(x)
+# print(out3.shape)
+# print("=========================================")
+# out4 = net4(x)
+# print(out4.shape)
diff --git a/code/recognization/test_recognization.py b/code/recognization/test_recognization.py
new file mode 100644
index 0000000000000000000000000000000000000000..eebb3f48d8a3839109c857dfc53b3d66565a26f5
--- /dev/null
+++ b/code/recognization/test_recognization.py
@@ -0,0 +1,28 @@
+from recognization import TextRecognition
+import os
+import argparse
+
+# Define the argument parser
+parser = argparse.ArgumentParser(description='Process an image.')
+parser.add_argument('--image_name', type=str, required=False, default='2.png', help='Path to the image file')
+parser.add_argument('--device', type=str, required=False, default='cpu', help='cpu or cuda')
+
+# Parse the arguments
+args = parser.parse_args()
+
+# Get the image path from the command line
+image_path = args.image_name
+device = args.device
+
+# Get current dir
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Get model path
+model_path = os.path.join(current_dir , 'model/recognization_model.pth' )
+recognizer = TextRecognition('model/recognization_model.pth' , device=device)
+
+# Get image path
+image_path = os.path.join(current_dir , f'data/{image_path}' )
+result = recognizer.recognize_image(image_path)
+
+print(result)
diff --git a/code/recognization/unet.py b/code/recognization/unet.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bb0e8a329948ff0106b0db87c6db7f39066fef7
--- /dev/null
+++ b/code/recognization/unet.py
@@ -0,0 +1,123 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Code For UNet Feature Extractor - Source - https://github.com/milesial/Pytorch-UNet
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        return self.double_conv(x)
+
+
+class Down(nn.Module):
+    """Downscaling with maxpool then double conv"""
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+
+    def forward(self, x):
+        return self.maxpool_conv(x)
+
+
+class Up(nn.Module):
+    """Upscaling then double conv"""
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
+        self.conv = DoubleConv(in_channels, out_channels)
+
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+
+
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+
+    def forward(self, x):
+        return self.conv(x)
+
+class UNet(nn.Module):
+    def __init__(self, n_channels=1, n_classes=512):
+        super(UNet, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+
+        self.inc = DoubleConv(n_channels, 32)
+        self.down1 = Down(32, 64)
+        self.down2 = Down(64, 128)
+        self.down3 = Down(128, 256)
+        self.down4 = Down(256, 512)
+        self.up1 = Up(512, 256)
+        self.up2 = Up(256, 128)
+        self.up3 = Up(128, 64)
+        self.up4 = Up(64, 32)
+        self.outc = OutConv(32, n_classes)
+
+    def forward(self, x):
+        # print(x.shape) # torch.Size([1, 1, 32, 400])
+        x1 = self.inc(x)
+        # print(x1.shape) # torch.Size([1, 32, 32, 400])
+        x2 = self.down1(x1)
+        # print(x2.shape) # torch.Size([1, 64, 16, 200])
+        x3 = self.down2(x2)
+        # print(x3.shape) # torch.Size([1, 128, 8, 100])
+        x4 = self.down3(x3)
+        # print(x4.shape) # torch.Size([1, 256, 4, 50])
+        x5 = self.down4(x4)
+        # print(x5.shape) # torch.Size([1, 512, 2, 25])
+        
+        # print("Upscaling...")
+        x = self.up1(x5, x4)
+        # print(x.shape) # torch.Size([1, 256, 4, 50])
+        x = self.up2(x, x3)
+        # print(x.shape) # torch.Size([1, 128, 8, 100])
+        x = self.up3(x, x2)
+        # print(x.shape) # torch.Size([1, 64, 16, 200])
+        x = self.up4(x, x1)
+        # print(x.shape) # torch.Size([1, 32, 32, 400])
+        logits = self.outc(x)
+        # print(logits.shape) # torch.Size([1, 512, 32, 400])
+        return logits
+
+# x = torch.randn(1, 1, 32, 400)
+# net = UNet()
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/unet_attn.py b/code/recognization/unet_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c0f9938e1cdc1b7430ff25937799c4ae99dd7b0
--- /dev/null
+++ b/code/recognization/unet_attn.py
@@ -0,0 +1,191 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+
+"""
+Source - https://github.com/sfczekalski/attention_unet
+Article - https://towardsdatascience.com/biomedical-image-segmentation-attention-u-net-29b6f0827405
+"""
+
+class ConvBlock(nn.Module):
+
+    def __init__(self, in_channels, out_channels):
+        super(ConvBlock, self).__init__()
+
+        # number of input channels is a number of filters in the previous layer
+        # number of output channels is a number of filters in the current layer
+        # "same" convolutions
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+
+class UpConv(nn.Module):
+
+    def __init__(self, in_channels, out_channels):
+        super(UpConv, self).__init__()
+
+        self.up = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        x = self.up(x)
+        return x
+
+
+class AttentionBlock(nn.Module):
+    """Attention block with learnable parameters"""
+
+    def __init__(self, F_g, F_l, n_coefficients):
+        """
+        :param F_g: number of feature maps (channels) in previous layer
+        :param F_l: number of feature maps in corresponding encoder layer, transferred via skip connection
+        :param n_coefficients: number of learnable multi-dimensional attention coefficients
+        """
+        super(AttentionBlock, self).__init__()
+
+        self.W_gate = nn.Sequential(
+            nn.Conv2d(F_g, n_coefficients, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(n_coefficients)
+        )
+
+        self.W_x = nn.Sequential(
+            nn.Conv2d(F_l, n_coefficients, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(n_coefficients)
+        )
+
+        self.psi = nn.Sequential(
+            nn.Conv2d(n_coefficients, 1, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(1),
+            nn.Sigmoid()
+        )
+
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, gate, skip_connection):
+        """
+        :param gate: gating signal from previous layer
+        :param skip_connection: activation from corresponding encoder layer
+        :return: output activations
+        """
+        g1 = self.W_gate(gate)
+        x1 = self.W_x(skip_connection)
+        psi = self.relu(g1 + x1)
+        psi = self.psi(psi)
+        out = skip_connection * psi
+        return out
+
+class AttnUNet(nn.Module):
+
+    def __init__(self, img_ch=1, output_ch=512):
+        super(AttnUNet, self).__init__()
+
+        self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
+
+        self.Conv1 = ConvBlock(img_ch, 32)
+        self.Conv2 = ConvBlock(32, 64)
+        self.Conv3 = ConvBlock(64, 128)
+        self.Conv4 = ConvBlock(128, 256)
+        self.Conv5 = ConvBlock(256, 512)
+        
+        self.Up5 = UpConv(512, 256)
+        self.Att5 = AttentionBlock(F_g=256, F_l=256, n_coefficients=128)
+        self.UpConv5 = ConvBlock(512, 256)
+        
+        self.Up4 = UpConv(256, 128)
+        self.Att4 = AttentionBlock(F_g=128, F_l=128, n_coefficients=64)
+        self.UpConv4 = ConvBlock(256, 128)
+        
+        self.Up3 = UpConv(128, 64)
+        self.Att3 = AttentionBlock(F_g=64, F_l=64, n_coefficients=32)
+        self.UpConv3 = ConvBlock(128, 64)
+        
+        self.Up2 = UpConv(64, 32)
+        self.Att2 = AttentionBlock(F_g=32, F_l=32, n_coefficients=16)
+        self.UpConv2 = ConvBlock(64, 32)
+
+        self.Conv = nn.Conv2d(32, output_ch, kernel_size=1, stride=1, padding=0)
+
+    def forward(self, x):
+        """
+        e : encoder layers
+        d : decoder layers
+        s : skip-connections from encoder layers to decoder layers
+        """
+        # print("="*20,"Feeding to Encoder","="*20)
+        # print ("Size 0", x.shape)
+        e1 = self.Conv1(x)
+        # print ("Size 1", e1.shape)
+
+        e2 = self.MaxPool(e1)
+        e2 = self.Conv2(e2)
+        # print ("Size 2", e2.shape)
+
+        e3 = self.MaxPool(e2)
+        e3 = self.Conv3(e3)
+        # print ("Size 3", e3.shape)
+
+        e4 = self.MaxPool(e3)
+        e4 = self.Conv4(e4)
+        # print ("Size 4", e4.shape)
+
+        e5 = self.MaxPool(e4)
+        e5 = self.Conv5(e5)
+        # print ("Size 5 (Final Encoder Output) : ", e5.shape)
+        
+        # print("\n","="*20,"Feeding to Decoder now","="*20)
+
+        d5 = self.Up5(e5)
+        s4 = self.Att5(gate=d5, skip_connection=e4)
+        d5 = torch.cat((s4, d5), dim=1) # concatenate attention-weighted skip connection with previous layer output
+        d5 = self.UpConv5(d5)
+        # print ("d5 ", d5.shape)
+
+        d4 = self.Up4(d5)
+        s3 = self.Att4(gate=d4, skip_connection=e3)
+        d4 = torch.cat((s3, d4), dim=1)
+        d4 = self.UpConv4(d4)
+        # print ("d4 ", d4.shape)
+
+        d3 = self.Up3(d4)
+        s2 = self.Att3(gate=d3, skip_connection=e2)
+        d3 = torch.cat((s2, d3), dim=1)
+        d3 = self.UpConv3(d3)
+        # print ("d3 ", d3.shape)
+
+        d2 = self.Up2(d3)
+        s1 = self.Att2(gate=d2, skip_connection=e1)
+        d2 = torch.cat((s1, d2), dim=1)
+        d2 = self.UpConv2(d2)
+        # print ("d2 ", d2.shape)
+
+        out = self.Conv(d2)
+        # print("out (Final Decoder Output) : ", out.shape)
+
+        return out
+                
+# x = torch.randn(1, 1, 32, 400)
+# net = AttnUNet(1,512)
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/unet_plus_plus.py b/code/recognization/unet_plus_plus.py
new file mode 100644
index 0000000000000000000000000000000000000000..69a89efc8b1c5266ea342116f3b83dc323e9de3d
--- /dev/null
+++ b/code/recognization/unet_plus_plus.py
@@ -0,0 +1,97 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import torch
+from torch import nn
+
+'''
+Source - https://github.com/4uiiurz1/pytorch-nested-unet
+An implementation of this paper - https://arxiv.org/abs/1807.10165
+'''
+
+
+class VGGBlock(nn.Module):
+    def __init__(self, in_channels, middle_channels, out_channels):
+        super().__init__()
+        self.relu = nn.ReLU(inplace=True)
+        self.conv1 = nn.Conv2d(in_channels, middle_channels, 3, padding=1)
+        self.bn1 = nn.BatchNorm2d(middle_channels)
+        self.conv2 = nn.Conv2d(middle_channels, out_channels, 3, padding=1)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        return out
+
+class NestedUNet(nn.Module):
+    def __init__(self, input_channels=1, out_channels=512):
+        super().__init__()
+
+        nb_filter = [32, 64, 128, 256, 512]
+
+        self.pool = nn.MaxPool2d(2, 2)
+        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
+
+        self.conv0_0 = VGGBlock(input_channels, nb_filter[0], nb_filter[0])
+        self.conv1_0 = VGGBlock(nb_filter[0], nb_filter[1], nb_filter[1])
+        self.conv2_0 = VGGBlock(nb_filter[1], nb_filter[2], nb_filter[2])
+        self.conv3_0 = VGGBlock(nb_filter[2], nb_filter[3], nb_filter[3])
+        self.conv4_0 = VGGBlock(nb_filter[3], nb_filter[4], nb_filter[4])
+
+        self.conv0_1 = VGGBlock(nb_filter[0]+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_1 = VGGBlock(nb_filter[1]+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv2_1 = VGGBlock(nb_filter[2]+nb_filter[3], nb_filter[2], nb_filter[2])
+        self.conv3_1 = VGGBlock(nb_filter[3]+nb_filter[4], nb_filter[3], nb_filter[3])
+
+        self.conv0_2 = VGGBlock(nb_filter[0]*2+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_2 = VGGBlock(nb_filter[1]*2+nb_filter[2], nb_filter[1], nb_filter[1])
+        self.conv2_2 = VGGBlock(nb_filter[2]*2+nb_filter[3], nb_filter[2], nb_filter[2])
+
+        self.conv0_3 = VGGBlock(nb_filter[0]*3+nb_filter[1], nb_filter[0], nb_filter[0])
+        self.conv1_3 = VGGBlock(nb_filter[1]*3+nb_filter[2], nb_filter[1], nb_filter[1])
+
+        self.conv0_4 = VGGBlock(nb_filter[0]*4+nb_filter[1], nb_filter[0], nb_filter[0])
+
+        self.final = nn.Conv2d(nb_filter[0], out_channels, kernel_size=1)
+
+
+    def forward(self, input):
+        x0_0 = self.conv0_0(input)
+        x1_0 = self.conv1_0(self.pool(x0_0))
+        x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], 1))
+
+        x2_0 = self.conv2_0(self.pool(x1_0))
+        x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], 1))
+        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], 1))
+
+        x3_0 = self.conv3_0(self.pool(x2_0))
+        x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], 1))
+        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], 1))
+        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], 1))
+
+        x4_0 = self.conv4_0(self.pool(x3_0))
+        x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], 1))
+        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], 1))
+        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], 1))
+        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], 1))
+
+        output = self.final(x0_4)
+        return output
+    
+# x = torch.randn(1, 1, 32, 400)
+# net = NestedUNet()
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/utils.py b/code/recognization/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6b01d8fef5ebe8a2c63084d335a1654ea264c7c
--- /dev/null
+++ b/code/recognization/utils.py
@@ -0,0 +1,478 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+import pytz
+import torch
+import numpy as np
+from datetime import datetime
+import matplotlib.pyplot as plt
+from torch.autograd import Variable
+
+import os,random,shutil
+import matplotlib.pyplot as plt
+
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+
+class CTCLabelConverter(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, character):
+        # character (str): set of the possible characters.
+        dict_character = list(character)
+
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            # NOTE: 0 is reserved for 'CTCblank' token required by CTCLoss
+            self.dict[char] = i + 1
+
+        self.character = ['[CTCblank]'] + dict_character  # dummy '[CTCblank]' token for CTCLoss (index 0)
+
+    def encode(self, text, batch_max_length=25):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+            batch_max_length: max length of text label in the batch. 25 by default
+
+        output:
+            text: text index for CTCLoss. [batch_size, batch_max_length]
+            length: length of each text. [batch_size]
+        """
+        length = [len(s) for s in text]
+
+        # The index used for padding (=0) would not affect the CTC loss calculation.
+        batch_text = torch.LongTensor(len(text), batch_max_length).fill_(0)
+        for i, t in enumerate(text):
+            text = list(t)
+            text = [self.dict[char] for char in text]
+            batch_text[i][:len(text)] = torch.LongTensor(text)
+        return (batch_text, torch.IntTensor(length))
+
+    def decode(self, text_index, length):
+        """ convert text-index into text-label. """
+        texts = []
+        for index, l in enumerate(length):
+            t = text_index[index, :]
+
+            char_list = []
+            for i in range(l):
+                if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):  # removing repeated characters and blank.
+                    char_list.append(self.character[t[i]])
+            text = ''.join(char_list)
+
+            texts.append(text)
+        return texts
+
+
+class CTCLabelConverterForBaiduWarpctc(object):
+    """ Convert between text-label and text-index for baidu warpctc """
+
+    def __init__(self, character):
+        # character (str): set of the possible characters.
+        dict_character = list(character)
+
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            # NOTE: 0 is reserved for 'CTCblank' token required by CTCLoss
+            self.dict[char] = i + 1
+
+        self.character = ['[CTCblank]'] + dict_character  # dummy '[CTCblank]' token for CTCLoss (index 0)
+
+    def encode(self, text, batch_max_length=25):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        length = [len(s) for s in text]
+        text = ''.join(text)
+        text = [self.dict[char] for char in text]
+
+        return (torch.IntTensor(text), torch.IntTensor(length))
+
+    def decode(self, text_index, length):
+        """ convert text-index into text-label. """
+        texts = []
+        index = 0
+        for l in length:
+            t = text_index[index:index + l]
+
+            char_list = []
+            for i in range(l):
+                if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):  # removing repeated characters and blank.
+                    char_list.append(self.character[t[i]])
+            text = ''.join(char_list)
+
+            texts.append(text)
+            index += l
+        return texts
+
+
+class AttnLabelConverter(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, character):
+        # character (str): set of the possible characters.
+        # [GO] for the start token of the attention decoder. [s] for end-of-sentence token.
+        list_token = ['[GO]', '[s]']  # ['[s]','[UNK]','[PAD]','[GO]']
+        list_character = list(character)
+        self.character = list_token + list_character
+
+        self.dict = {}
+        for i, char in enumerate(self.character):
+            # print(i, char)
+            self.dict[char] = i
+
+    def encode(self, text, batch_max_length=25):
+        """ convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+            batch_max_length: max length of text label in the batch. 25 by default
+
+        output:
+            text : the input of attention decoder. [batch_size x (max_length+2)] +1 for [GO] token and +1 for [s] token.
+                text[:, 0] is [GO] token and text is padded with [GO] token after [s] token.
+            length : the length of output of attention decoder, which count [s] token also. [3, 7, ....] [batch_size]
+        """
+        length = [len(s) + 1 for s in text]  # +1 for [s] at end of sentence.
+        # batch_max_length = max(length) # this is not allowed for multi-gpu setting
+        batch_max_length += 1
+        # additional +1 for [GO] at first step. batch_text is padded with [GO] token after [s] token.
+        batch_text = torch.LongTensor(len(text), batch_max_length + 1).fill_(0)
+        for i, t in enumerate(text):
+            text = list(t)
+            text.append('[s]')
+            
+            try:
+                text = [self.dict[char] for char in text]
+            except KeyError as e:
+                continue
+            batch_text[i][1:1 + len(text)] = torch.LongTensor(text)  # batch_text[:, 0] = [GO] token
+        return (batch_text, torch.IntTensor(length))
+
+    def decode(self, text_index, length):
+        """ convert text-index into text-label. """
+        texts = []
+        for index, l in enumerate(length):
+            text = ''.join([self.character[i] for i in text_index[index, :]])
+            texts.append(text)
+        return texts
+
+
+def imshow(img, title,batch_size=1):
+  std_correction = np.asarray([0.229, 0.224, 0.225]).reshape(3, 1, 1)
+  mean_correction = np.asarray([0.485, 0.456, 0.406]).reshape(3, 1, 1)
+  npimg = np.multiply(img.numpy(), std_correction) + mean_correction
+  plt.figure(figsize = (batch_size * 4, 4))
+  plt.axis("off")
+  plt.imshow(np.transpose(npimg, (1, 2, 0)))
+  plt.title(title)
+  plt.show()
+
+
+class Averager(object):
+    """Compute average for torch.Tensor, used for loss average."""
+
+    def __init__(self):
+        self.reset()
+
+    def add(self, v):
+        count = v.data.numel()
+        v = v.data.sum()
+        self.n_count += count
+        self.sum += v
+
+    def reset(self):
+        self.n_count = 0
+        self.sum = 0
+
+    def val(self):
+        res = 0
+        if self.n_count != 0:
+            res = self.sum / float(self.n_count)
+        return res
+
+class Logger(object):
+    """For logging while training"""
+    def __init__(self, path):
+        self.logFile = path
+        datetime_now = str(datetime.now(pytz.timezone('Asia/Kolkata')).strftime("%Y-%m-%d_%H-%M-%S"))
+        with open(self.logFile,"w",encoding="utf-8") as f:
+            f.write("Logging at @ " + str(datetime_now) + "\n")
+
+    def log(self,*input):
+        message = ""
+        for x in input:
+            message+=str(x) + " "
+        message = message.strip()
+        print(message)
+        with open(self.logFile,"a",encoding="utf-8") as f:
+            f.write(str(message)+"\n")
+
+
+def allign_two_strings(x:str, y:str, pxy:int=1, pgap:int=1):
+    """
+    Source: https://www.geeksforgeeks.org/sequence-alignment-problem/
+    """
+    i = 0
+    j = 0
+    m = len(x)
+    n = len(y)
+    dp = np.zeros([m+1,n+1], dtype=int)
+    dp[0:(m+1),0] = [ i * pgap for i in range(m+1)]
+    dp[0,0:(n+1)] = [ i * pgap for i in range(n+1)]
+ 
+    i = 1
+    while i <= m:
+        j = 1
+        while j <= n:
+            if x[i - 1] == y[j - 1]:
+                dp[i][j] = dp[i - 1][j - 1]
+            else:
+                dp[i][j] = min(dp[i - 1][j - 1] + pxy,
+                                dp[i - 1][j] + pgap,
+                                dp[i][j - 1] + pgap)
+            j += 1
+        i += 1
+     
+    l = n + m 
+    i = m
+    j = n
+     
+    xpos = l
+    ypos = l
+ 
+    xans = np.zeros(l+1, dtype=int)
+    yans = np.zeros(l+1, dtype=int)
+ 
+    while not (i == 0 or j == 0):
+        #print(f"i: {i}, j: {j}")
+        if x[i - 1] == y[j - 1]:       
+            xans[xpos] = ord(x[i - 1])
+            yans[ypos] = ord(y[j - 1])
+            xpos -= 1
+            ypos -= 1
+            i -= 1
+            j -= 1
+        elif (dp[i - 1][j - 1] + pxy) == dp[i][j]:
+         
+            xans[xpos] = ord(x[i - 1])
+            yans[ypos] = ord(y[j - 1])
+            xpos -= 1
+            ypos -= 1
+            i -= 1
+            j -= 1
+         
+        elif (dp[i - 1][j] + pgap) == dp[i][j]:
+            xans[xpos] = ord(x[i - 1])
+            yans[ypos] = ord('_')
+            xpos -= 1
+            ypos -= 1
+            i -= 1
+         
+        elif (dp[i][j - 1] + pgap) == dp[i][j]:       
+            xans[xpos] = ord('_')
+            yans[ypos] = ord(y[j - 1])
+            xpos -= 1
+            ypos -= 1
+            j -= 1
+         
+ 
+    while xpos > 0:
+        if i > 0:
+            i -= 1
+            xans[xpos] = ord(x[i])
+            xpos -= 1
+        else:
+            xans[xpos] = ord('_')
+            xpos -= 1
+     
+    while ypos > 0:
+        if j > 0:
+            j -= 1
+            yans[ypos] = ord(y[j])
+            ypos -= 1
+        else:
+            yans[ypos] = ord('_')
+            ypos -= 1
+
+    id = 1
+    i = l
+    while i >= 1:
+        if (chr(yans[i]) == '_') and chr(xans[i]) == '_':
+            id = i + 1
+            break
+         
+        i -= 1
+ 
+    i = id
+    x_seq = ""
+    while i <= l:
+        x_seq += chr(xans[i])
+        i += 1
+ 
+    # Y
+    i = id
+    y_seq = ""
+    while i <= l:
+        y_seq += chr(yans[i])
+        i += 1
+    
+    return x_seq, y_seq
+
+# Function to count the number of trainable parameters in a model in "Millions"
+def count_parameters(model,precision=2):
+    return (round(sum(p.numel() for p in model.parameters() if p.requires_grad) / 10.**6, precision))
+
+'''
+# Code for counting the number of FLOPs in the CNN backbone during inference
+Source - https://github.com/fdbtrs/ElasticFace/blob/main/utils/countFLOPS.py
+'''
+
+def count_model_flops(model,in_channels=1, input_res=[32, 400], multiply_adds=True):
+    list_conv = []
+
+    def conv_hook(self, input, output):
+        batch_size, input_channels, input_height, input_width = input[0].size()
+        output_channels, output_height, output_width = output[0].size()
+
+        kernel_ops = self.kernel_size[0] * self.kernel_size[1] * (self.in_channels / self.groups)
+        bias_ops = 1 if self.bias is not None else 0
+
+        params = output_channels * (kernel_ops + bias_ops)
+        flops = (kernel_ops * (
+            2 if multiply_adds else 1) + bias_ops) * output_channels * output_height * output_width * batch_size
+        list_conv.append(flops)
+    list_linear = []
+
+    def linear_hook(self, input, output):
+        batch_size = input[0].size(0) if input[0].dim() == 2 else 1
+
+        weight_ops = self.weight.nelement() * (2 if multiply_adds else 1)
+        if self.bias is not None:
+            bias_ops = self.bias.nelement() if self.bias.nelement() else 0
+            flops = batch_size * (weight_ops + bias_ops)
+        else:
+            flops = batch_size * weight_ops
+        list_linear.append(flops)
+
+    list_bn = []
+
+    def bn_hook(self, input, output):
+        list_bn.append(input[0].nelement() * 2)
+
+    list_relu = []
+
+    def relu_hook(self, input, output):
+        list_relu.append(input[0].nelement())
+
+    list_pooling = []
+
+    def pooling_hook(self, input, output):
+        batch_size, input_channels, input_height, input_width = input[0].size()
+        output_channels, output_height, output_width = output[0].size()
+        # If kernel_size is a tuple type, computer ops as product of elements or else if it is int type, compute ops as square of kernel_size
+        kernel_ops = self.kernel_size[0] * self.kernel_size[1] if isinstance(self.kernel_size, tuple) else self.kernel_size * self.kernel_size
+        bias_ops = 0
+        params = 0
+        flops = (kernel_ops + bias_ops) * output_channels * output_height * output_width * batch_size
+        list_pooling.append(flops)
+    
+    def dropout_hook(self, input, output):
+        # calculate the number of operations for a dropout function by assuming that each operation involves one comparison and one multiplication
+        batch_size, input_channels, input_height, input_width = input[0].size()
+        list_conv.append(2*batch_size*input_channels*input_height*input_width)
+    
+    def sigmoid_hook(self,input,output):
+        # calculate the number of operations for a sigmoid function by assuming that each operation involves two multiplications and one addition
+        batch_size, input_channels, input_height, input_width = input[0].size()
+        list_conv.append(3*batch_size*input_channels*input_height*input_width)
+    
+    def upsample_hook(self, input, output):
+        batch_size, input_channels, input_height, input_width = input[0].size()
+        output_channels, output_height, output_width = output[0].size()
+
+        kernel_ops = self.scale_factor * self.scale_factor # * (self.in_channels / self.groups)
+        flops = (kernel_ops * (
+            2 if multiply_adds else 1)) * output_channels * output_height * output_width * batch_size
+        list_conv.append(flops)
+
+    handles = []
+
+    def foo(net):
+        childrens = list(net.children())
+        if not childrens:
+            if isinstance(net, torch.nn.Conv2d) or isinstance(net, torch.nn.ConvTranspose2d):
+                handles.append(net.register_forward_hook(conv_hook))
+            elif isinstance(net, torch.nn.Linear):
+                handles.append(net.register_forward_hook(linear_hook))
+            elif isinstance(net, torch.nn.BatchNorm2d) or isinstance(net, torch.nn.BatchNorm1d):
+                handles.append(net.register_forward_hook(bn_hook))
+            elif isinstance(net, torch.nn.ReLU) or isinstance(net, torch.nn.PReLU):
+                handles.append(net.register_forward_hook(relu_hook))
+            elif isinstance(net, torch.nn.MaxPool2d) or isinstance(net, torch.nn.AvgPool2d):
+                handles.append(net.register_forward_hook(pooling_hook))
+            elif isinstance(net, torch.nn.Dropout):
+                handles.append(net.register_forward_hook(dropout_hook))
+            elif isinstance(net,torch.nn.Upsample):
+                handles.append(net.register_forward_hook(upsample_hook))
+            elif isinstance(net,torch.nn.Sigmoid):
+                handles.append(net.register_forward_hook(sigmoid_hook))
+            else:
+                print("warning" + str(net))
+            return
+        for c in childrens:
+            foo(c)
+
+    model.eval()
+    foo(model)
+    input = Variable(torch.rand(in_channels, input_res[1], input_res[0]).unsqueeze(0), requires_grad=True)
+    out = model(input)
+    total_flops = (sum(list_conv) + sum(list_linear) + sum(list_bn) + sum(list_relu) + sum(list_pooling))
+    for h in handles:
+        h.remove()
+    model.train()
+    
+    def flops_to_string(flops, units='MFLOPS', precision=4):
+        if units == 'GFLOPS':
+            return str(round(flops / 10.**9, precision)) + ' ' + units
+        elif units == 'MFLOPS':
+            return str(round(flops / 10.**6, precision)) + ' ' + units
+        elif units == 'KFLOPS':
+            return str(round(flops / 10.**3, precision)) + ' ' + units
+        else:
+            return str(flops) + ' FLOPS'
+    
+    return flops_to_string(total_flops)
+
+
+def draw_feature_map(visual_feature,vis_dir,num_channel=10):
+    """draws feature maps for the given visual features
+    Args:
+        visual_feature (Tensor): Shape (C, H, W)
+        vis_dir (String): Directory to save the feature maps
+    """
+    if os.path.exists(vis_dir):
+        shutil.rmtree(vis_dir)
+    os.makedirs(vis_dir)
+    # Save visual_feature from num_channel random channels for visualization
+    for i in range(num_channel):
+        random_channel = random.randint(0, visual_feature.shape[1]-1)
+        visual_feature_for_visualization = visual_feature[0, random_channel, :, :].detach().cpu().numpy()
+        # Horizontal flip
+        visual_feature_for_visualization = visual_feature_for_visualization[:,::-1]
+        # Normalize
+        visual_feature_for_visualization = (visual_feature_for_visualization - visual_feature_for_visualization.min()) / (visual_feature_for_visualization.max() - visual_feature_for_visualization.min())
+        # Draw heatmap
+        plt.imshow(visual_feature_for_visualization, cmap='gray', interpolation='nearest')
+        plt.axis("off")
+        plt.savefig(os.path.join(vis_dir, "channel_{}.png".format(random_channel)), bbox_inches='tight', pad_inches=0)
\ No newline at end of file
diff --git a/code/recognization/vgg.py b/code/recognization/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf2f9853f4011bb54471e6ed0c920ec185a5de33
--- /dev/null
+++ b/code/recognization/vgg.py
@@ -0,0 +1,40 @@
+"""
+Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023
+Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora
+GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+Project Website: https://abdur75648.github.io/UTRNet/
+Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial
+4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/)
+"""
+
+from torch import nn
+
+class VGG(nn.Module):
+    """ FeatureExtractor of CRNN (https://arxiv.org/pdf/1507.05717.pdf) """
+    def __init__(self, input_channel=1, output_channel=512):
+        super(VGG, self).__init__()
+        self.output_channel = [int(output_channel / 8), int(output_channel / 4),
+                               int(output_channel / 2), output_channel]
+        self.ConvNet = nn.Sequential(
+            nn.Conv2d(input_channel, self.output_channel[0], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[0], self.output_channel[1], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[1], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[2], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[2], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 2, 1, 0), nn.ReLU(True))
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
+# import torch    
+# x = torch.randn(1, 1, 32, 400)
+# net = VGG()
+# out = net(x)
+# print(out.shape)
\ No newline at end of file
diff --git a/code/recognization/words.txt b/code/recognization/words.txt
new file mode 100755
index 0000000000000000000000000000000000000000..98f22177c4191052a1bd888519d65c5450212227
--- /dev/null
+++ b/code/recognization/words.txt
@@ -0,0 +1,180 @@
+ا
+آ
+ب
+ب
+ت
+ث
+ث
+ج
+ج
+ح
+خ
+د
+ڈ
+ذ
+ر
+ڑ
+ز
+ز
+س
+ش
+ص
+ض
+ط
+ظ
+ع
+غ
+ف
+ق
+ک
+ك
+گ
+ل
+م
+ن
+ں
+و
+ه
+ھ
+ء
+ى
+ے
+ئ
+ۓ
+ي
+ې
+ٿ
+ڐ
+ڙ
+أ
+ؤ
+ۀ
+ۃ
+ة
+ه
+ۂ
+ﮥ
+ٴ
+َ
+ً
+ُ
+ِ
+ٍ
+ْ
+ٗ
+ٓ
+ٰ
+ٖ
+٘
+ٔ
+ّ
+ؔ
+۰
+١
+٢
+٣
+٣
+٥
+٦
+٧
+٨
+٩
+٪
+%
++
+=
+٤
+٫
+,
+-
+_
+٥
+٬
+"
+'
+/
+\
+>
+<
+؍
+،
+؛
+:
+؟
+{
+}
+[
+]
+(
+)
+.
+‘
+’
+٠
+۔
+“
+”
+!
+*
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/full_information_extraction.py b/full_information_extraction.py
new file mode 100644
index 0000000000000000000000000000000000000000..75378e93600dd8f2d1f4241b5094a5f75b198a43
--- /dev/null
+++ b/full_information_extraction.py
@@ -0,0 +1,45 @@
+from code.detection.recognize_id.detect_and_recognize_id import Recognize_ID
+from code.detection.detection import detection
+from code.recognization.recognization import TextRecognition
+import os
+import argparse
+
+# Define the argument parser
+parser = argparse.ArgumentParser(description='Process an image.')
+parser.add_argument('--image_name', type=str, required=False, default='id_1.png', help='Path to the image file')
+parser.add_argument('--device', type=str, required=False, default='cpu', help='cpu or cuda')
+
+# Parse the arguments
+args = parser.parse_args()
+
+# Get the image path from the command line
+image_name = args.image_name
+device = args.device
+
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Recognize ID
+rec_id = Recognize_ID()
+image_path = os.path.join(current_dir , 'data' , image_name )
+id = rec_id.give_me_id_number(image_path)
+
+# Detection
+det = detection()
+detection_list = det.full_pipeline(image_path,True)
+
+result = ''
+# Loop on all detected images and recognize them
+recognizer = TextRecognition(device=device)
+for line in detection_list[2:6]:
+    for word in line:
+        recognized_word = recognizer.recognize_image(word)
+        result = result + recognized_word + ' '
+    result += '\n'    
+
+# Add Id number 
+result = result + id
+
+
+print(result)
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ced148bd0d235ba96a3b213023f7b6b6a3bf26c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+torch
+numpy
+tqdm
+opencv-python
+ultralytics
diff --git a/samples/id_1.png b/samples/id_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4dc92b97fca873a8fd6934610c8e99cc5fcbf8e
Binary files /dev/null and b/samples/id_1.png differ
diff --git a/samples/id_2.jpg b/samples/id_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3f6ea35ef16162f42e20075243cf012b173c1a82
Binary files /dev/null and b/samples/id_2.jpg differ