File size: 5,623 Bytes
d670799 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import warnings
from functools import partial
from multiprocessing import Manager, cpu_count
import numpy as np
from mmengine import Config, DictAction, track_parallel_progress
from mmengine.registry import init_default_scope
from mmaction.registry import DATASETS, TRANSFORMS
def parse_args():
parser = argparse.ArgumentParser(description='MMAction2 check datasets')
parser.add_argument('config', help='test config file path')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
default={},
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function (deprecate), '
'change to --eval-options instead.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
default={},
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. For example, '
"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
parser.add_argument(
'--output-file',
default='invalid-video.txt',
help='Output file path which keeps corrupted/missing video file paths')
parser.add_argument(
'--split',
default='train',
choices=['train', 'val', 'test'],
help='Dataset split')
parser.add_argument(
'--decoder',
default='decord',
choices=['decord', 'opencv', 'pyav'],
help='Video decoder type, should be one of [decord, opencv, pyav]')
parser.add_argument(
'--nproc',
type=int,
default=(cpu_count() - 1 or 1),
help='Number of processes to check videos')
parser.add_argument(
'--remove-corrupted-videos',
action='store_true',
help='Whether to delete all corrupted videos')
args = parser.parse_args()
if args.options and args.eval_options:
raise ValueError(
'--options and --eval-options cannot be both '
'specified, --options is deprecated in favor of --eval-options')
if args.options:
warnings.warn('--options is deprecated in favor of --eval-options')
args.eval_options = args.options
return args
@TRANSFORMS.register_module()
class RandomSampleFrames:
def __call__(self, results):
"""Select frames to verify.
Select the first, last and three random frames, Required key is
"total_frames", added or modified key is "frame_inds".
Args:
results (dict): The resulting dict to be modified and passed
to the next transform in pipeline.
"""
assert results['total_frames'] > 0
# first and last frames
results['frame_inds'] = np.array([0, results['total_frames'] - 1])
# choose 3 random frames
if results['total_frames'] > 2:
results['frame_inds'] = np.concatenate([
results['frame_inds'],
np.random.randint(1, results['total_frames'] - 1, 3)
])
return results
def _do_check_videos(lock, pipeline, output_file, data_info):
try:
pipeline(data_info)
except: # noqa
# save invalid video path to output file
lock.acquire()
with open(output_file, 'a') as f:
f.write(data_info['filename'] + '\n')
lock.release()
if __name__ == '__main__':
args = parse_args()
decoder_to_pipeline_prefix = dict(
decord='Decord', opencv='OpenCV', pyav='PyAV')
# read config file
cfg = Config.fromfile(args.config)
cfg.merge_from_dict(args.cfg_options)
init_default_scope(cfg.get('default_scope', 'mmaction'))
# build dataset
dataset_cfg = cfg.get(f'{args.split}_dataloader').dataset
dataset_type = dataset_cfg.type
assert dataset_type == 'VideoDataset'
dataset_cfg.pipeline = [
dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Init'),
dict(type='RandomSampleFrames'),
dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Decode')
]
dataset = DATASETS.build(dataset_cfg)
dataset_cfg.pop('type')
pipeline = dataset.pipeline
# prepare for checking
if os.path.exists(args.output_file):
# remove existing output file
os.remove(args.output_file)
lock = Manager().Lock()
worker_fn = partial(_do_check_videos, lock, pipeline, args.output_file)
# avoid copy dataset for multiprocess
data_info_list = [
dataset.get_data_info(idx) for idx in range(len(dataset))
]
# start checking
track_parallel_progress(worker_fn, data_info_list, nproc=args.nproc)
if os.path.exists(args.output_file):
num_lines = sum(1 for _ in open(args.output_file))
print(f'Checked {len(dataset)} videos, '
f'{num_lines} are corrupted/missing.')
if args.remove_corrupted_videos:
print('Start deleting corrupted videos')
cnt = 0
with open(args.output_file, 'r') as f:
for line in f:
if os.path.exists(line.strip()):
os.remove(line.strip())
cnt += 1
print(f'Deleted {cnt} corrupted videos.')
else:
print(f'Checked {len(dataset)} videos, none are corrupted/missing')
|