|
|
""" |
|
|
Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023 |
|
|
Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora |
|
|
GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition |
|
|
Project Website: https://abdur75648.github.io/UTRNet/ |
|
|
Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial |
|
|
4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/) |
|
|
""" |
|
|
|
|
|
from torch import nn |
|
|
import torch.nn.functional as F |
|
|
|
|
|
|
|
|
class GRCL(nn.Module): |
|
|
|
|
|
def __init__(self, input_channel, output_channel, num_iteration, kernel_size, pad): |
|
|
super(GRCL, self).__init__() |
|
|
self.wgf_u = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=False) |
|
|
self.wgr_x = nn.Conv2d(output_channel, output_channel, 1, 1, 0, bias=False) |
|
|
self.wf_u = nn.Conv2d(input_channel, output_channel, kernel_size, 1, pad, bias=False) |
|
|
self.wr_x = nn.Conv2d(output_channel, output_channel, kernel_size, 1, pad, bias=False) |
|
|
|
|
|
self.BN_x_init = nn.BatchNorm2d(output_channel) |
|
|
|
|
|
self.num_iteration = num_iteration |
|
|
self.GRCL = [GRCL_unit(output_channel) for _ in range(num_iteration)] |
|
|
self.GRCL = nn.Sequential(*self.GRCL) |
|
|
|
|
|
def forward(self, input): |
|
|
""" The input of GRCL is consistant over time t, which is denoted by u(0) |
|
|
thus wgf_u / wf_u is also consistant over time t. |
|
|
""" |
|
|
wgf_u = self.wgf_u(input) |
|
|
wf_u = self.wf_u(input) |
|
|
x = F.relu(self.BN_x_init(wf_u)) |
|
|
|
|
|
for i in range(self.num_iteration): |
|
|
x = self.GRCL[i](wgf_u, self.wgr_x(x), wf_u, self.wr_x(x)) |
|
|
|
|
|
return x |
|
|
|
|
|
class GRCL_unit(nn.Module): |
|
|
|
|
|
def __init__(self, output_channel): |
|
|
super(GRCL_unit, self).__init__() |
|
|
self.BN_gfu = nn.BatchNorm2d(output_channel) |
|
|
self.BN_grx = nn.BatchNorm2d(output_channel) |
|
|
self.BN_fu = nn.BatchNorm2d(output_channel) |
|
|
self.BN_rx = nn.BatchNorm2d(output_channel) |
|
|
self.BN_Gx = nn.BatchNorm2d(output_channel) |
|
|
|
|
|
def forward(self, wgf_u, wgr_x, wf_u, wr_x): |
|
|
G_first_term = self.BN_gfu(wgf_u) |
|
|
G_second_term = self.BN_grx(wgr_x) |
|
|
G = F.sigmoid(G_first_term + G_second_term) |
|
|
|
|
|
x_first_term = self.BN_fu(wf_u) |
|
|
x_second_term = self.BN_Gx(self.BN_rx(wr_x) * G) |
|
|
x = F.relu(x_first_term + x_second_term) |
|
|
|
|
|
return x |
|
|
|
|
|
class RCNN(nn.Module): |
|
|
""" FeatureExtractor of GRCNN (https://papers.nips.cc/paper/6637-gated-recurrent-convolution-neural-network-for-ocr.pdf) """ |
|
|
|
|
|
def __init__(self, input_channel=1, output_channel=512): |
|
|
super(RCNN, self).__init__() |
|
|
self.output_channel = [int(output_channel / 8), int(output_channel / 4), |
|
|
int(output_channel / 2), output_channel] |
|
|
self.ConvNet = nn.Sequential( |
|
|
nn.Conv2d(input_channel, self.output_channel[0], 3, 1, 1), nn.ReLU(True), |
|
|
nn.MaxPool2d(2, 2), |
|
|
GRCL(self.output_channel[0], self.output_channel[0], num_iteration=5, kernel_size=3, pad=1), |
|
|
nn.MaxPool2d(2, 2), |
|
|
GRCL(self.output_channel[0], self.output_channel[1], num_iteration=5, kernel_size=3, pad=1), |
|
|
nn.MaxPool2d(2, (2, 1), (0, 1)), |
|
|
GRCL(self.output_channel[1], self.output_channel[2], num_iteration=5, kernel_size=3, pad=1), |
|
|
nn.MaxPool2d(2, (2, 1), (0, 1)), |
|
|
nn.Conv2d(self.output_channel[2], self.output_channel[3], 2, 1, 0, bias=False), |
|
|
nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True)) |
|
|
|
|
|
def forward(self, input): |
|
|
return self.ConvNet(input) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|