| | |
| |
|
| | import logging |
| |
|
| | import torch.nn as nn |
| | from mmcv.cnn import ConvModule, constant_init, kaiming_init |
| | from mmcv.runner import load_checkpoint |
| | from torch.nn.modules.batchnorm import _BatchNorm |
| |
|
| | from ..builder import BACKBONES |
| |
|
| |
|
| | class ResBlock(nn.Module): |
| | """The basic residual block used in Darknet. Each ResBlock consists of two |
| | ConvModules and the input is added to the final output. Each ConvModule is |
| | composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer |
| | has half of the number of the filters as much as the second convLayer. The |
| | first convLayer has filter size of 1x1 and the second one has the filter |
| | size of 3x3. |
| | |
| | Args: |
| | in_channels (int): The input channels. Must be even. |
| | conv_cfg (dict): Config dict for convolution layer. Default: None. |
| | norm_cfg (dict): Dictionary to construct and config norm layer. |
| | Default: dict(type='BN', requires_grad=True) |
| | act_cfg (dict): Config dict for activation layer. |
| | Default: dict(type='LeakyReLU', negative_slope=0.1). |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels, |
| | conv_cfg=None, |
| | norm_cfg=dict(type='BN', requires_grad=True), |
| | act_cfg=dict(type='LeakyReLU', negative_slope=0.1)): |
| | super(ResBlock, self).__init__() |
| | assert in_channels % 2 == 0 |
| | half_in_channels = in_channels // 2 |
| |
|
| | |
| | cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
| |
|
| | self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg) |
| | self.conv2 = ConvModule( |
| | half_in_channels, in_channels, 3, padding=1, **cfg) |
| |
|
| | def forward(self, x): |
| | residual = x |
| | out = self.conv1(x) |
| | out = self.conv2(out) |
| | out = out + residual |
| |
|
| | return out |
| |
|
| |
|
| | @BACKBONES.register_module() |
| | class Darknet(nn.Module): |
| | """Darknet backbone. |
| | |
| | Args: |
| | depth (int): Depth of Darknet. Currently only support 53. |
| | out_indices (Sequence[int]): Output from which stages. |
| | frozen_stages (int): Stages to be frozen (stop grad and set eval mode). |
| | -1 means not freezing any parameters. Default: -1. |
| | conv_cfg (dict): Config dict for convolution layer. Default: None. |
| | norm_cfg (dict): Dictionary to construct and config norm layer. |
| | Default: dict(type='BN', requires_grad=True) |
| | act_cfg (dict): Config dict for activation layer. |
| | Default: dict(type='LeakyReLU', negative_slope=0.1). |
| | norm_eval (bool): Whether to set norm layers to eval mode, namely, |
| | freeze running stats (mean and var). Note: Effect on Batch Norm |
| | and its variants only. |
| | |
| | Example: |
| | >>> from mmdet.models import Darknet |
| | >>> import torch |
| | >>> self = Darknet(depth=53) |
| | >>> self.eval() |
| | >>> inputs = torch.rand(1, 3, 416, 416) |
| | >>> level_outputs = self.forward(inputs) |
| | >>> for level_out in level_outputs: |
| | ... print(tuple(level_out.shape)) |
| | ... |
| | (1, 256, 52, 52) |
| | (1, 512, 26, 26) |
| | (1, 1024, 13, 13) |
| | """ |
| |
|
| | |
| | arch_settings = { |
| | 53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512), |
| | (512, 1024))) |
| | } |
| |
|
| | def __init__(self, |
| | depth=53, |
| | out_indices=(3, 4, 5), |
| | frozen_stages=-1, |
| | conv_cfg=None, |
| | norm_cfg=dict(type='BN', requires_grad=True), |
| | act_cfg=dict(type='LeakyReLU', negative_slope=0.1), |
| | norm_eval=True): |
| | super(Darknet, self).__init__() |
| | if depth not in self.arch_settings: |
| | raise KeyError(f'invalid depth {depth} for darknet') |
| | self.depth = depth |
| | self.out_indices = out_indices |
| | self.frozen_stages = frozen_stages |
| | self.layers, self.channels = self.arch_settings[depth] |
| |
|
| | cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
| |
|
| | self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg) |
| |
|
| | self.cr_blocks = ['conv1'] |
| | for i, n_layers in enumerate(self.layers): |
| | layer_name = f'conv_res_block{i + 1}' |
| | in_c, out_c = self.channels[i] |
| | self.add_module( |
| | layer_name, |
| | self.make_conv_res_block(in_c, out_c, n_layers, **cfg)) |
| | self.cr_blocks.append(layer_name) |
| |
|
| | self.norm_eval = norm_eval |
| |
|
| | def forward(self, x): |
| | outs = [] |
| | for i, layer_name in enumerate(self.cr_blocks): |
| | cr_block = getattr(self, layer_name) |
| | x = cr_block(x) |
| | if i in self.out_indices: |
| | outs.append(x) |
| |
|
| | return tuple(outs) |
| |
|
| | def init_weights(self, pretrained=None): |
| | if isinstance(pretrained, str): |
| | logger = logging.getLogger() |
| | load_checkpoint(self, pretrained, strict=False, logger=logger) |
| | elif pretrained is None: |
| | for m in self.modules(): |
| | if isinstance(m, nn.Conv2d): |
| | kaiming_init(m) |
| | elif isinstance(m, (_BatchNorm, nn.GroupNorm)): |
| | constant_init(m, 1) |
| |
|
| | else: |
| | raise TypeError('pretrained must be a str or None') |
| |
|
| | def _freeze_stages(self): |
| | if self.frozen_stages >= 0: |
| | for i in range(self.frozen_stages): |
| | m = getattr(self, self.cr_blocks[i]) |
| | m.eval() |
| | for param in m.parameters(): |
| | param.requires_grad = False |
| |
|
| | def train(self, mode=True): |
| | super(Darknet, self).train(mode) |
| | self._freeze_stages() |
| | if mode and self.norm_eval: |
| | for m in self.modules(): |
| | if isinstance(m, _BatchNorm): |
| | m.eval() |
| |
|
| | @staticmethod |
| | def make_conv_res_block(in_channels, |
| | out_channels, |
| | res_repeat, |
| | conv_cfg=None, |
| | norm_cfg=dict(type='BN', requires_grad=True), |
| | act_cfg=dict(type='LeakyReLU', |
| | negative_slope=0.1)): |
| | """In Darknet backbone, ConvLayer is usually followed by ResBlock. This |
| | function will make that. The Conv layers always have 3x3 filters with |
| | stride=2. The number of the filters in Conv layer is the same as the |
| | out channels of the ResBlock. |
| | |
| | Args: |
| | in_channels (int): The number of input channels. |
| | out_channels (int): The number of output channels. |
| | res_repeat (int): The number of ResBlocks. |
| | conv_cfg (dict): Config dict for convolution layer. Default: None. |
| | norm_cfg (dict): Dictionary to construct and config norm layer. |
| | Default: dict(type='BN', requires_grad=True) |
| | act_cfg (dict): Config dict for activation layer. |
| | Default: dict(type='LeakyReLU', negative_slope=0.1). |
| | """ |
| |
|
| | cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) |
| |
|
| | model = nn.Sequential() |
| | model.add_module( |
| | 'conv', |
| | ConvModule( |
| | in_channels, out_channels, 3, stride=2, padding=1, **cfg)) |
| | for idx in range(res_repeat): |
| | model.add_module('res{}'.format(idx), |
| | ResBlock(out_channels, **cfg)) |
| | return model |
| |
|