| |
| import torch.nn as nn |
| from mmcv.runner import BaseModule, Sequential |
|
|
| import mmocr.utils as utils |
| from mmocr.models.builder import BACKBONES |
| from mmocr.models.textrecog.layers import BasicBlock |
|
|
|
|
| @BACKBONES.register_module() |
| class ResNet31OCR(BaseModule): |
| """Implement ResNet backbone for text recognition, modified from |
| `ResNet <https://arxiv.org/pdf/1512.03385.pdf>`_ |
| Args: |
| base_channels (int): Number of channels of input image tensor. |
| layers (list[int]): List of BasicBlock number for each stage. |
| channels (list[int]): List of out_channels of Conv2d layer. |
| out_indices (None | Sequence[int]): Indices of output stages. |
| stage4_pool_cfg (dict): Dictionary to construct and configure |
| pooling layer in stage 4. |
| last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. |
| """ |
|
|
| def __init__(self, |
| base_channels=3, |
| layers=[1, 2, 5, 3], |
| channels=[64, 128, 256, 256, 512, 512, 512], |
| out_indices=None, |
| stage4_pool_cfg=dict(kernel_size=(2, 1), stride=(2, 1)), |
| last_stage_pool=False, |
| init_cfg=[ |
| dict(type='Kaiming', layer='Conv2d'), |
| dict(type='Uniform', layer='BatchNorm2d') |
| ]): |
| super().__init__(init_cfg=init_cfg) |
| assert isinstance(base_channels, int) |
| assert utils.is_type_list(layers, int) |
| assert utils.is_type_list(channels, int) |
| assert out_indices is None or isinstance(out_indices, (list, tuple)) |
| assert isinstance(last_stage_pool, bool) |
|
|
| self.out_indices = out_indices |
| self.last_stage_pool = last_stage_pool |
|
|
| |
| self.conv1_1 = nn.Conv2d( |
| base_channels, channels[0], kernel_size=3, stride=1, padding=1) |
| self.bn1_1 = nn.BatchNorm2d(channels[0]) |
| self.relu1_1 = nn.ReLU(inplace=True) |
|
|
| self.conv1_2 = nn.Conv2d( |
| channels[0], channels[1], kernel_size=3, stride=1, padding=1) |
| self.bn1_2 = nn.BatchNorm2d(channels[1]) |
| self.relu1_2 = nn.ReLU(inplace=True) |
|
|
| |
| self.pool2 = nn.MaxPool2d( |
| kernel_size=2, stride=2, padding=0, ceil_mode=True) |
| self.block2 = self._make_layer(channels[1], channels[2], layers[0]) |
| self.conv2 = nn.Conv2d( |
| channels[2], channels[2], kernel_size=3, stride=1, padding=1) |
| self.bn2 = nn.BatchNorm2d(channels[2]) |
| self.relu2 = nn.ReLU(inplace=True) |
|
|
| |
| self.pool3 = nn.MaxPool2d( |
| kernel_size=2, stride=2, padding=0, ceil_mode=True) |
| self.block3 = self._make_layer(channels[2], channels[3], layers[1]) |
| self.conv3 = nn.Conv2d( |
| channels[3], channels[3], kernel_size=3, stride=1, padding=1) |
| self.bn3 = nn.BatchNorm2d(channels[3]) |
| self.relu3 = nn.ReLU(inplace=True) |
|
|
| |
| self.pool4 = nn.MaxPool2d(padding=0, ceil_mode=True, **stage4_pool_cfg) |
| self.block4 = self._make_layer(channels[3], channels[4], layers[2]) |
| self.conv4 = nn.Conv2d( |
| channels[4], channels[4], kernel_size=3, stride=1, padding=1) |
| self.bn4 = nn.BatchNorm2d(channels[4]) |
| self.relu4 = nn.ReLU(inplace=True) |
|
|
| |
| self.pool5 = None |
| if self.last_stage_pool: |
| self.pool5 = nn.MaxPool2d( |
| kernel_size=2, stride=2, padding=0, ceil_mode=True) |
| self.block5 = self._make_layer(channels[4], channels[5], layers[3]) |
| self.conv5 = nn.Conv2d( |
| channels[5], channels[5], kernel_size=3, stride=1, padding=1) |
| self.bn5 = nn.BatchNorm2d(channels[5]) |
| self.relu5 = nn.ReLU(inplace=True) |
|
|
| def _make_layer(self, input_channels, output_channels, blocks): |
| layers = [] |
| for _ in range(blocks): |
| downsample = None |
| if input_channels != output_channels: |
| downsample = Sequential( |
| nn.Conv2d( |
| input_channels, |
| output_channels, |
| kernel_size=1, |
| stride=1, |
| bias=False), |
| nn.BatchNorm2d(output_channels), |
| ) |
| layers.append( |
| BasicBlock( |
| input_channels, output_channels, downsample=downsample)) |
| input_channels = output_channels |
|
|
| return Sequential(*layers) |
|
|
| def forward(self, x): |
|
|
| x = self.conv1_1(x) |
| x = self.bn1_1(x) |
| x = self.relu1_1(x) |
|
|
| x = self.conv1_2(x) |
| x = self.bn1_2(x) |
| x = self.relu1_2(x) |
|
|
| outs = [] |
| for i in range(4): |
| layer_index = i + 2 |
| pool_layer = getattr(self, f'pool{layer_index}') |
| block_layer = getattr(self, f'block{layer_index}') |
| conv_layer = getattr(self, f'conv{layer_index}') |
| bn_layer = getattr(self, f'bn{layer_index}') |
| relu_layer = getattr(self, f'relu{layer_index}') |
|
|
| if pool_layer is not None: |
| x = pool_layer(x) |
| x = block_layer(x) |
| x = conv_layer(x) |
| x = bn_layer(x) |
| x = relu_layer(x) |
|
|
| outs.append(x) |
|
|
| if self.out_indices is not None: |
| return tuple([outs[i] for i in self.out_indices]) |
|
|
| return x |
|
|