| | |
| | from mmcv.cnn import ConvModule |
| | from torch import nn |
| | from torch.utils import checkpoint as cp |
| |
|
| | from .se_layer import SELayer |
| |
|
| |
|
| | class InvertedResidual(nn.Module): |
| | """InvertedResidual block for MobileNetV2. |
| | |
| | Args: |
| | in_channels (int): The input channels of the InvertedResidual block. |
| | out_channels (int): The output channels of the InvertedResidual block. |
| | stride (int): Stride of the middle (first) 3x3 convolution. |
| | expand_ratio (int): Adjusts number of channels of the hidden layer |
| | in InvertedResidual by this amount. |
| | dilation (int): Dilation rate of depthwise conv. Default: 1 |
| | conv_cfg (dict): Config dict for convolution layer. |
| | Default: None, which means using conv2d. |
| | norm_cfg (dict): Config dict for normalization layer. |
| | Default: dict(type='BN'). |
| | act_cfg (dict): Config dict for activation layer. |
| | Default: dict(type='ReLU6'). |
| | with_cp (bool): Use checkpoint or not. Using checkpoint will save some |
| | memory while slowing down the training speed. Default: False. |
| | |
| | Returns: |
| | Tensor: The output tensor. |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels, |
| | out_channels, |
| | stride, |
| | expand_ratio, |
| | dilation=1, |
| | conv_cfg=None, |
| | norm_cfg=dict(type='BN'), |
| | act_cfg=dict(type='ReLU6'), |
| | with_cp=False, |
| | **kwargs): |
| | super().__init__() |
| | self.stride = stride |
| | assert stride in [1, 2], f'stride must in [1, 2]. ' \ |
| | f'But received {stride}.' |
| | self.with_cp = with_cp |
| | self.use_res_connect = self.stride == 1 and in_channels == out_channels |
| | hidden_dim = int(round(in_channels * expand_ratio)) |
| |
|
| | layers = [] |
| | if expand_ratio != 1: |
| | layers.append( |
| | ConvModule( |
| | in_channels=in_channels, |
| | out_channels=hidden_dim, |
| | kernel_size=1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg, |
| | **kwargs)) |
| | layers.extend([ |
| | ConvModule( |
| | in_channels=hidden_dim, |
| | out_channels=hidden_dim, |
| | kernel_size=3, |
| | stride=stride, |
| | padding=dilation, |
| | dilation=dilation, |
| | groups=hidden_dim, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg, |
| | **kwargs), |
| | ConvModule( |
| | in_channels=hidden_dim, |
| | out_channels=out_channels, |
| | kernel_size=1, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=None, |
| | **kwargs) |
| | ]) |
| | self.conv = nn.Sequential(*layers) |
| |
|
| | def forward(self, x): |
| |
|
| | def _inner_forward(x): |
| | if self.use_res_connect: |
| | return x + self.conv(x) |
| | else: |
| | return self.conv(x) |
| |
|
| | if self.with_cp and x.requires_grad: |
| | out = cp.checkpoint(_inner_forward, x) |
| | else: |
| | out = _inner_forward(x) |
| |
|
| | return out |
| |
|
| |
|
| | class InvertedResidualV3(nn.Module): |
| | """Inverted Residual Block for MobileNetV3. |
| | |
| | Args: |
| | in_channels (int): The input channels of this Module. |
| | out_channels (int): The output channels of this Module. |
| | mid_channels (int): The input channels of the depthwise convolution. |
| | kernel_size (int): The kernel size of the depthwise convolution. |
| | Default: 3. |
| | stride (int): The stride of the depthwise convolution. Default: 1. |
| | se_cfg (dict): Config dict for se layer. Default: None, which means no |
| | se layer. |
| | with_expand_conv (bool): Use expand conv or not. If set False, |
| | mid_channels must be the same with in_channels. Default: True. |
| | conv_cfg (dict): Config dict for convolution layer. Default: None, |
| | which means using conv2d. |
| | norm_cfg (dict): Config dict for normalization layer. |
| | Default: dict(type='BN'). |
| | act_cfg (dict): Config dict for activation layer. |
| | Default: dict(type='ReLU'). |
| | with_cp (bool): Use checkpoint or not. Using checkpoint will save some |
| | memory while slowing down the training speed. Default: False. |
| | |
| | Returns: |
| | Tensor: The output tensor. |
| | """ |
| |
|
| | def __init__(self, |
| | in_channels, |
| | out_channels, |
| | mid_channels, |
| | kernel_size=3, |
| | stride=1, |
| | se_cfg=None, |
| | with_expand_conv=True, |
| | conv_cfg=None, |
| | norm_cfg=dict(type='BN'), |
| | act_cfg=dict(type='ReLU'), |
| | with_cp=False): |
| | super().__init__() |
| | self.with_res_shortcut = (stride == 1 and in_channels == out_channels) |
| | assert stride in [1, 2] |
| | self.with_cp = with_cp |
| | self.with_se = se_cfg is not None |
| | self.with_expand_conv = with_expand_conv |
| |
|
| | if self.with_se: |
| | assert isinstance(se_cfg, dict) |
| | if not self.with_expand_conv: |
| | assert mid_channels == in_channels |
| |
|
| | if self.with_expand_conv: |
| | self.expand_conv = ConvModule( |
| | in_channels=in_channels, |
| | out_channels=mid_channels, |
| | kernel_size=1, |
| | stride=1, |
| | padding=0, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| | self.depthwise_conv = ConvModule( |
| | in_channels=mid_channels, |
| | out_channels=mid_channels, |
| | kernel_size=kernel_size, |
| | stride=stride, |
| | padding=kernel_size // 2, |
| | groups=mid_channels, |
| | conv_cfg=dict( |
| | type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=act_cfg) |
| |
|
| | if self.with_se: |
| | self.se = SELayer(**se_cfg) |
| |
|
| | self.linear_conv = ConvModule( |
| | in_channels=mid_channels, |
| | out_channels=out_channels, |
| | kernel_size=1, |
| | stride=1, |
| | padding=0, |
| | conv_cfg=conv_cfg, |
| | norm_cfg=norm_cfg, |
| | act_cfg=None) |
| |
|
| | def forward(self, x): |
| |
|
| | def _inner_forward(x): |
| | out = x |
| |
|
| | if self.with_expand_conv: |
| | out = self.expand_conv(out) |
| |
|
| | out = self.depthwise_conv(out) |
| |
|
| | if self.with_se: |
| | out = self.se(out) |
| |
|
| | out = self.linear_conv(out) |
| |
|
| | if self.with_res_shortcut: |
| | return x + out |
| | else: |
| | return out |
| |
|
| | if self.with_cp and x.requires_grad: |
| | out = cp.checkpoint(_inner_forward, x) |
| | else: |
| | out = _inner_forward(x) |
| |
|
| | return out |
| |
|