File size: 5,382 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Copyright (c) OpenMMLab. All rights reserved.
from unittest.mock import MagicMock

import torch

from mmaction.registry import MODELS
from mmaction.structures import ActionDataSample
from mmaction.testing import get_recognizer_cfg
from mmaction.utils import register_all_modules


def train_test_step(cfg, input_shape):
    recognizer = MODELS.build(cfg.model)
    num_classes = cfg.model.cls_head.num_classes
    data_batch = {
        'inputs': [torch.randint(0, 256, input_shape)],
        'data_samples': [ActionDataSample().set_gt_label(2)]
    }

    # test train_step
    optim_wrapper = MagicMock()
    loss_vars = recognizer.train_step(data_batch, optim_wrapper)
    assert 'loss' in loss_vars
    assert 'loss_cls' in loss_vars
    optim_wrapper.update_params.assert_called_once()

    # test test_step
    with torch.no_grad():
        predictions = recognizer.test_step(data_batch)
    score = predictions[0].pred_score
    assert len(predictions) == 1
    assert score.shape == torch.Size([num_classes])
    assert torch.min(score) >= 0
    assert torch.max(score) <= 1

    # test when average_clips is None
    recognizer.cls_head.average_clips = None
    num_views = 3
    input_shape = (num_views, *input_shape[1:])
    data_batch['inputs'] = [torch.randint(0, 256, input_shape)]
    with torch.no_grad():
        predictions = recognizer.test_step(data_batch)
    score = predictions[0].pred_score
    assert len(predictions) == 1
    assert score.shape == torch.Size([num_views, num_classes])

    return loss_vars, predictions


def test_i3d():
    register_all_modules()
    config = get_recognizer_cfg(
        'i3d/i3d_imagenet-pretrained-r50_8xb8-32x2x1-100e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 8, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_r2plus1d():
    register_all_modules()
    config = get_recognizer_cfg(
        'r2plus1d/r2plus1d_r34_8xb8-8x8x1-180e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    config.model['backbone']['norm_cfg'] = dict(type='BN3d')
    input_shape = (1, 3, 8, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_slowfast():
    register_all_modules()
    config = get_recognizer_cfg(
        'slowfast/slowfast_r50_8xb8-4x16x1-256e_kinetics400-rgb.py')
    input_shape = (1, 3, 16, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_csn():
    register_all_modules()
    config = get_recognizer_cfg(
        'csn/ircsn_ig65m-pretrained-r152_8xb12-32x2x1-58e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 8, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_timesformer():
    register_all_modules()
    config = get_recognizer_cfg(
        'timesformer/timesformer_divST_8xb8-8x32x1-15e_kinetics400-rgb.py')
    config.model['backbone']['pretrained'] = None
    config.model['backbone']['img_size'] = 32
    input_shape = (1, 3, 8, 32, 32)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_c3d():
    register_all_modules()
    config = get_recognizer_cfg(
        'c3d/c3d_sports1m-pretrained_8xb30-16x1x1-45e_ucf101-rgb.py')
    config.model['backbone']['pretrained'] = None
    config.model['backbone']['out_dim'] = 512
    input_shape = (1, 3, 16, 28, 28)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_slowonly():
    register_all_modules()
    config = get_recognizer_cfg(
        'slowonly/slowonly_r50_8xb16-4x16x1-256e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 4, 32, 32)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_tpn_slowonly():
    register_all_modules()
    config = get_recognizer_cfg('tpn/tpn-slowonly_imagenet-pretrained-r50_'
                                '8xb8-8x8x1-150e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 4, 48, 48)  # M C T H W
    loss_vars, _ = train_test_step(config, input_shape=input_shape)
    assert 'loss_aux' in loss_vars
    assert loss_vars['loss_cls'] + loss_vars['loss_aux'] == loss_vars['loss']


def test_swin():
    register_all_modules()
    config = get_recognizer_cfg('swin/swin-tiny-p244-w877_in1k-pre_'
                                '8xb8-amp-32x2x1-30e_kinetics400-rgb.py')
    config.model['backbone']['pretrained2d'] = False
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 4, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)


def test_c2d():
    register_all_modules()
    config = get_recognizer_cfg(
        'c2d/c2d_r50-in1k-pre_8xb32-8x8x1-100e_kinetics400-rgb.py')
    config.model['backbone']['pretrained'] = None
    input_shape = (1, 3, 8, 64, 64)  # M C T H W
    train_test_step(config, input_shape=input_shape)