File size: 5,750 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Callable, List, Optional, Union

from mmengine.fileio import exists, list_from_file

from mmaction.registry import DATASETS
from mmaction.utils import ConfigType
from .base import BaseActionDataset


@DATASETS.register_module()
class RawframeDataset(BaseActionDataset):
    """Rawframe dataset for action recognition.



    The dataset loads raw frames and apply specified transforms to return a

    dict containing the frame tensors and other information.



    The ann_file is a text file with multiple lines, and each line indicates

    the directory to frames of a video, total frames of the video and

    the label of a video, which are split with a whitespace.

    Example of a annotation file:



    .. code-block:: txt



        some/directory-1 163 1

        some/directory-2 122 1

        some/directory-3 258 2

        some/directory-4 234 2

        some/directory-5 295 3

        some/directory-6 121 3



    Example of a multi-class annotation file:





    .. code-block:: txt



        some/directory-1 163 1 3 5

        some/directory-2 122 1 2

        some/directory-3 258 2

        some/directory-4 234 2 4 6 8

        some/directory-5 295 3

        some/directory-6 121 3



    Example of a with_offset annotation file (clips from long videos), each

    line indicates the directory to frames of a video, the index of the start

    frame, total frames of the video clip and the label of a video clip, which

    are split with a whitespace.





    .. code-block:: txt



        some/directory-1 12 163 3

        some/directory-2 213 122 4

        some/directory-3 100 258 5

        some/directory-4 98 234 2

        some/directory-5 0 295 3

        some/directory-6 50 121 3





    Args:

        ann_file (str): Path to the annotation file.

        pipeline (List[Union[dict, ConfigDict, Callable]]): A sequence of

            data transforms.

        data_prefix (dict or ConfigDict): Path to a directory where video

            frames are held. Defaults to ``dict(img='')``.

        filename_tmpl (str): Template for each filename.

            Defaults to ``img_{:05}.jpg``.

        with_offset (bool): Determines whether the offset information is in

            ann_file. Defaults to False.

        multi_class (bool): Determines whether it is a multi-class

            recognition dataset. Defaults to False.

        num_classes (int, optional): Number of classes in the dataset.

            Defaults to None.

        start_index (int): Specify a start index for frames in consideration of

            different filename format. However, when taking frames as input,

            it should be set to 1, since raw frames count from 1.

            Defaults to 1.

        modality (str): Modality of data. Support ``RGB``, ``Flow``.

            Defaults to ``RGB``.

        test_mode (bool): Store True when building test or validation dataset.

            Defaults to False.

    """

    def __init__(self,

                 ann_file: str,

                 pipeline: List[Union[ConfigType, Callable]],

                 data_prefix: ConfigType = dict(img=''),

                 filename_tmpl: str = 'img_{:05}.jpg',

                 with_offset: bool = False,

                 multi_class: bool = False,

                 num_classes: Optional[int] = None,

                 start_index: int = 1,

                 modality: str = 'RGB',

                 test_mode: bool = False,

                 **kwargs) -> None:
        self.filename_tmpl = filename_tmpl
        self.with_offset = with_offset
        super().__init__(
            ann_file,
            pipeline=pipeline,
            data_prefix=data_prefix,
            test_mode=test_mode,
            multi_class=multi_class,
            num_classes=num_classes,
            start_index=start_index,
            modality=modality,
            **kwargs)

    def load_data_list(self) -> List[dict]:
        """Load annotation file to get video information."""
        exists(self.ann_file)
        data_list = []
        fin = list_from_file(self.ann_file)
        for line in fin:
            line_split = line.strip().split()
            video_info = {}
            idx = 0
            # idx for frame_dir
            frame_dir = line_split[idx]
            if self.data_prefix['img'] is not None:
                frame_dir = osp.join(self.data_prefix['img'], frame_dir)
            video_info['frame_dir'] = frame_dir
            idx += 1
            if self.with_offset:
                # idx for offset and total_frames
                video_info['offset'] = int(line_split[idx])
                video_info['total_frames'] = int(line_split[idx + 1])
                idx += 2
            else:
                # idx for total_frames
                video_info['total_frames'] = int(line_split[idx])
                idx += 1
            # idx for label[s]
            label = [int(x) for x in line_split[idx:]]
            # add fake label for inference datalist without label
            if not label:
                label = [-1]
            if self.multi_class:
                assert self.num_classes is not None
                video_info['label'] = label
            else:
                assert len(label) == 1
                video_info['label'] = label[0]
            data_list.append(video_info)

        return data_list

    def get_data_info(self, idx: int) -> dict:
        """Get annotation by index."""
        data_info = super().get_data_info(idx)
        data_info['filename_tmpl'] = self.filename_tmpl
        return data_info