File size: 2,459 Bytes
d670799 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmaction.models import TimeSformer
from mmaction.testing import generate_backbone_demo_inputs
def test_timesformer_backbone():
input_shape = (1, 3, 8, 64, 64)
imgs = generate_backbone_demo_inputs(input_shape)
# divided_space_time
timesformer = TimeSformer(
8, 64, 16, embed_dims=768, attention_type='divided_space_time')
timesformer.init_weights()
from mmaction.models.common import (DividedSpatialAttentionWithNorm,
DividedTemporalAttentionWithNorm,
FFNWithNorm)
assert isinstance(timesformer.transformer_layers.layers[0].attentions[0],
DividedTemporalAttentionWithNorm)
assert isinstance(timesformer.transformer_layers.layers[11].attentions[1],
DividedSpatialAttentionWithNorm)
assert isinstance(timesformer.transformer_layers.layers[0].ffns[0],
FFNWithNorm)
assert hasattr(timesformer, 'time_embed')
assert timesformer.patch_embed.num_patches == 16
cls_tokens = timesformer(imgs)
assert cls_tokens.shape == torch.Size([1, 768])
# space_only
timesformer = TimeSformer(
8, 64, 16, embed_dims=512, num_heads=8, attention_type='space_only')
timesformer.init_weights()
assert not hasattr(timesformer, 'time_embed')
assert timesformer.patch_embed.num_patches == 16
cls_tokens = timesformer(imgs)
assert cls_tokens.shape == torch.Size([1, 512])
# joint_space_time
input_shape = (1, 3, 2, 64, 64)
imgs = generate_backbone_demo_inputs(input_shape)
timesformer = TimeSformer(
2,
64,
8,
embed_dims=256,
num_heads=8,
attention_type='joint_space_time')
timesformer.init_weights()
assert hasattr(timesformer, 'time_embed')
assert timesformer.patch_embed.num_patches == 64
cls_tokens = timesformer(imgs)
assert cls_tokens.shape == torch.Size([1, 256])
with pytest.raises(AssertionError):
# unsupported attention type
timesformer = TimeSformer(
8, 64, 16, attention_type='wrong_attention_type')
with pytest.raises(AssertionError):
# Wrong transformer_layers type
timesformer = TimeSformer(8, 64, 16, transformer_layers='wrong_type')
|