File size: 6,740 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# Copyright (c) OpenMMLab. All rights reserved.
import torch.nn as nn
from mmcv.cnn import ConvModule
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm

from mmaction.registry import MODELS
from .resnet3d import Bottleneck3d, ResNet3d


class CSNBottleneck3d(Bottleneck3d):
    """Channel-Separated Bottleneck Block.



    This module is proposed in

    "Video Classification with Channel-Separated Convolutional Networks"

    Link: https://arxiv.org/pdf/1711.11248.pdf



    Args:

        inplanes (int): Number of channels for the input in first conv3d layer.

        planes (int): Number of channels produced by some norm/conv3d layers.

        bottleneck_mode (str): Determine which ways to factorize a 3D

            bottleneck block using channel-separated convolutional networks.

                If set to 'ip', it will replace the 3x3x3 conv2 layer with a

                1x1x1 traditional convolution and a 3x3x3 depthwise

                convolution, i.e., Interaction-preserved channel-separated

                bottleneck block.

                If set to 'ir', it will replace the 3x3x3 conv2 layer with a

                3x3x3 depthwise convolution, which is derived from preserved

                bottleneck block by removing the extra 1x1x1 convolution,

                i.e., Interaction-reduced channel-separated bottleneck block.

            Default: 'ir'.

        args (position arguments): Position arguments for Bottleneck.

        kwargs (dict, optional): Keyword arguments for Bottleneck.

    """

    def __init__(self,

                 inplanes,

                 planes,

                 *args,

                 bottleneck_mode='ir',

                 **kwargs):
        super(CSNBottleneck3d, self).__init__(inplanes, planes, *args,
                                              **kwargs)
        self.bottleneck_mode = bottleneck_mode
        conv2 = []
        if self.bottleneck_mode == 'ip':
            conv2.append(
                ConvModule(
                    planes,
                    planes,
                    1,
                    stride=1,
                    bias=False,
                    conv_cfg=self.conv_cfg,
                    norm_cfg=self.norm_cfg,
                    act_cfg=None))
        conv2_kernel_size = self.conv2.conv.kernel_size
        conv2_stride = self.conv2.conv.stride
        conv2_padding = self.conv2.conv.padding
        conv2_dilation = self.conv2.conv.dilation
        conv2_bias = bool(self.conv2.conv.bias)
        self.conv2 = ConvModule(
            planes,
            planes,
            conv2_kernel_size,
            stride=conv2_stride,
            padding=conv2_padding,
            dilation=conv2_dilation,
            bias=conv2_bias,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg,
            groups=planes)
        conv2.append(self.conv2)
        self.conv2 = nn.Sequential(*conv2)


@MODELS.register_module()
class ResNet3dCSN(ResNet3d):
    """ResNet backbone for CSN.



    Args:

        depth (int): Depth of ResNetCSN, from {18, 34, 50, 101, 152}.

        pretrained (str | None): Name of pretrained model.

        temporal_strides (tuple[int]):

            Temporal strides of residual blocks of each stage.

            Default: (1, 2, 2, 2).

        conv1_kernel (tuple[int]): Kernel size of the first conv layer.

            Default: (3, 7, 7).

        conv1_stride_t (int): Temporal stride of the first conv layer.

            Default: 1.

        pool1_stride_t (int): Temporal stride of the first pooling layer.

            Default: 1.

        norm_cfg (dict): Config for norm layers. required keys are `type` and

            `requires_grad`.

            Default: dict(type='BN3d', requires_grad=True, eps=1e-3).

        inflate_style (str): `3x1x1` or `3x3x3`. which determines the kernel

            sizes and padding strides for conv1 and conv2 in each block.

            Default: '3x3x3'.

        bottleneck_mode (str): Determine which ways to factorize a 3D

            bottleneck block using channel-separated convolutional networks.

                If set to 'ip', it will replace the 3x3x3 conv2 layer with a

                1x1x1 traditional convolution and a 3x3x3 depthwise

                convolution, i.e., Interaction-preserved channel-separated

                bottleneck block.

                If set to 'ir', it will replace the 3x3x3 conv2 layer with a

                3x3x3 depthwise convolution, which is derived from preserved

                bottleneck block by removing the extra 1x1x1 convolution,

                i.e., Interaction-reduced channel-separated bottleneck block.

            Default: 'ip'.

        kwargs (dict, optional): Key arguments for "make_res_layer".

    """

    def __init__(self,

                 depth,

                 pretrained,

                 temporal_strides=(1, 2, 2, 2),

                 conv1_kernel=(3, 7, 7),

                 conv1_stride_t=1,

                 pool1_stride_t=1,

                 norm_cfg=dict(type='BN3d', requires_grad=True, eps=1e-3),

                 inflate_style='3x3x3',

                 bottleneck_mode='ir',

                 bn_frozen=False,

                 **kwargs):
        self.arch_settings = {
            # 18: (BasicBlock3d, (2, 2, 2, 2)),
            # 34: (BasicBlock3d, (3, 4, 6, 3)),
            50: (CSNBottleneck3d, (3, 4, 6, 3)),
            101: (CSNBottleneck3d, (3, 4, 23, 3)),
            152: (CSNBottleneck3d, (3, 8, 36, 3))
        }
        self.bn_frozen = bn_frozen
        if bottleneck_mode not in ['ip', 'ir']:
            raise ValueError(f'Bottleneck mode must be "ip" or "ir",'
                             f'but got {bottleneck_mode}.')
        super(ResNet3dCSN, self).__init__(
            depth,
            pretrained,
            temporal_strides=temporal_strides,
            conv1_kernel=conv1_kernel,
            conv1_stride_t=conv1_stride_t,
            pool1_stride_t=pool1_stride_t,
            norm_cfg=norm_cfg,
            inflate_style=inflate_style,
            bottleneck_mode=bottleneck_mode,
            **kwargs)

    def train(self, mode=True):
        """Set the optimization status when training."""
        super(ResNet3d, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                if isinstance(m, _BatchNorm):
                    m.eval()
                    if self.bn_frozen:
                        for param in m.parameters():
                            param.requires_grad = False