|
|
| import os.path as osp
|
| from typing import Callable, Dict, List, Optional, Union
|
|
|
| from mmengine.utils import check_file_exist
|
|
|
| from mmaction.registry import DATASETS
|
| from .base import BaseActionDataset
|
|
|
|
|
| @DATASETS.register_module()
|
| class AudioDataset(BaseActionDataset):
|
| """Audio dataset for action recognition.
|
|
|
| The ann_file is a text file with multiple lines, and each line indicates
|
| a sample audio or extracted audio feature with the filepath, total frames
|
| of the raw video and label, which are split with a whitespace.
|
| Example of a annotation file:
|
|
|
| .. code-block:: txt
|
| some/directory-1.npy 163 1
|
| some/directory-2.npy 122 1
|
| some/directory-3.npy 258 2
|
| some/directory-4.npy 234 2
|
| some/directory-5.npy 295 3
|
| some/directory-6.npy 121 3
|
|
|
| Args:
|
| ann_file (str): Path to the annotation file.
|
| pipeline (list[dict | callable]): A sequence of data transforms.
|
| data_prefix (dict): Path to a directory where
|
| audios are held. Defaults to ``dict(audio='')``.
|
| multi_class (bool): Determines whether it is a multi-class
|
| recognition dataset. Defaults to False.
|
| num_classes (int, optional): Number of classes in the dataset.
|
| Defaults to None.
|
| """
|
|
|
| def __init__(self,
|
| ann_file: str,
|
| pipeline: List[Union[Dict, Callable]],
|
| data_prefix: Dict = dict(audio=''),
|
| multi_class: bool = False,
|
| num_classes: Optional[int] = None,
|
| **kwargs) -> None:
|
| super().__init__(
|
| ann_file,
|
| pipeline,
|
| data_prefix=data_prefix,
|
| multi_class=multi_class,
|
| num_classes=num_classes,
|
| modality='Audio',
|
| **kwargs)
|
|
|
| def load_data_list(self) -> List[Dict]:
|
| """Load annotation file to get audio information."""
|
| check_file_exist(self.ann_file)
|
| data_list = []
|
| with open(self.ann_file, 'r') as fin:
|
| for line in fin:
|
| line_split = line.strip().split()
|
| video_info = {}
|
| idx = 0
|
| filename = line_split[idx]
|
| if self.data_prefix['audio'] is not None:
|
| filename = osp.join(self.data_prefix['audio'], filename)
|
| video_info['audio_path'] = filename
|
| idx += 1
|
|
|
| video_info['total_frames'] = int(line_split[idx])
|
| idx += 1
|
|
|
| label = [int(x) for x in line_split[idx:]]
|
| assert label, f'missing label in line: {line}'
|
| if self.multi_class:
|
| assert self.num_classes is not None
|
| video_info['label'] = label
|
| else:
|
| assert len(label) == 1
|
| video_info['label'] = label[0]
|
| data_list.append(video_info)
|
|
|
| return data_list
|
|
|