File size: 6,740 Bytes
d670799 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmaction.registry import MODELS
from .resnet3d import Bottleneck3d, ResNet3d
class CSNBottleneck3d(Bottleneck3d):
"""Channel-Separated Bottleneck Block.
This module is proposed in
"Video Classification with Channel-Separated Convolutional Networks"
Link: https://arxiv.org/pdf/1711.11248.pdf
Args:
inplanes (int): Number of channels for the input in first conv3d layer.
planes (int): Number of channels produced by some norm/conv3d layers.
bottleneck_mode (str): Determine which ways to factorize a 3D
bottleneck block using channel-separated convolutional networks.
If set to 'ip', it will replace the 3x3x3 conv2 layer with a
1x1x1 traditional convolution and a 3x3x3 depthwise
convolution, i.e., Interaction-preserved channel-separated
bottleneck block.
If set to 'ir', it will replace the 3x3x3 conv2 layer with a
3x3x3 depthwise convolution, which is derived from preserved
bottleneck block by removing the extra 1x1x1 convolution,
i.e., Interaction-reduced channel-separated bottleneck block.
Default: 'ir'.
args (position arguments): Position arguments for Bottleneck.
kwargs (dict, optional): Keyword arguments for Bottleneck.
"""
def __init__(self,
inplanes,
planes,
*args,
bottleneck_mode='ir',
**kwargs):
super(CSNBottleneck3d, self).__init__(inplanes, planes, *args,
**kwargs)
self.bottleneck_mode = bottleneck_mode
conv2 = []
if self.bottleneck_mode == 'ip':
conv2.append(
ConvModule(
planes,
planes,
1,
stride=1,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=None))
conv2_kernel_size = self.conv2.conv.kernel_size
conv2_stride = self.conv2.conv.stride
conv2_padding = self.conv2.conv.padding
conv2_dilation = self.conv2.conv.dilation
conv2_bias = bool(self.conv2.conv.bias)
self.conv2 = ConvModule(
planes,
planes,
conv2_kernel_size,
stride=conv2_stride,
padding=conv2_padding,
dilation=conv2_dilation,
bias=conv2_bias,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
groups=planes)
conv2.append(self.conv2)
self.conv2 = nn.Sequential(*conv2)
@MODELS.register_module()
class ResNet3dCSN(ResNet3d):
"""ResNet backbone for CSN.
Args:
depth (int): Depth of ResNetCSN, from {18, 34, 50, 101, 152}.
pretrained (str | None): Name of pretrained model.
temporal_strides (tuple[int]):
Temporal strides of residual blocks of each stage.
Default: (1, 2, 2, 2).
conv1_kernel (tuple[int]): Kernel size of the first conv layer.
Default: (3, 7, 7).
conv1_stride_t (int): Temporal stride of the first conv layer.
Default: 1.
pool1_stride_t (int): Temporal stride of the first pooling layer.
Default: 1.
norm_cfg (dict): Config for norm layers. required keys are `type` and
`requires_grad`.
Default: dict(type='BN3d', requires_grad=True, eps=1e-3).
inflate_style (str): `3x1x1` or `3x3x3`. which determines the kernel
sizes and padding strides for conv1 and conv2 in each block.
Default: '3x3x3'.
bottleneck_mode (str): Determine which ways to factorize a 3D
bottleneck block using channel-separated convolutional networks.
If set to 'ip', it will replace the 3x3x3 conv2 layer with a
1x1x1 traditional convolution and a 3x3x3 depthwise
convolution, i.e., Interaction-preserved channel-separated
bottleneck block.
If set to 'ir', it will replace the 3x3x3 conv2 layer with a
3x3x3 depthwise convolution, which is derived from preserved
bottleneck block by removing the extra 1x1x1 convolution,
i.e., Interaction-reduced channel-separated bottleneck block.
Default: 'ip'.
kwargs (dict, optional): Key arguments for "make_res_layer".
"""
def __init__(self,
depth,
pretrained,
temporal_strides=(1, 2, 2, 2),
conv1_kernel=(3, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
norm_cfg=dict(type='BN3d', requires_grad=True, eps=1e-3),
inflate_style='3x3x3',
bottleneck_mode='ir',
bn_frozen=False,
**kwargs):
self.arch_settings = {
# 18: (BasicBlock3d, (2, 2, 2, 2)),
# 34: (BasicBlock3d, (3, 4, 6, 3)),
50: (CSNBottleneck3d, (3, 4, 6, 3)),
101: (CSNBottleneck3d, (3, 4, 23, 3)),
152: (CSNBottleneck3d, (3, 8, 36, 3))
}
self.bn_frozen = bn_frozen
if bottleneck_mode not in ['ip', 'ir']:
raise ValueError(f'Bottleneck mode must be "ip" or "ir",'
f'but got {bottleneck_mode}.')
super(ResNet3dCSN, self).__init__(
depth,
pretrained,
temporal_strides=temporal_strides,
conv1_kernel=conv1_kernel,
conv1_stride_t=conv1_stride_t,
pool1_stride_t=pool1_stride_t,
norm_cfg=norm_cfg,
inflate_style=inflate_style,
bottleneck_mode=bottleneck_mode,
**kwargs)
def train(self, mode=True):
"""Set the optimization status when training."""
super(ResNet3d, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, _BatchNorm):
m.eval()
if self.bn_frozen:
for param in m.parameters():
param.requires_grad = False
|