Spaces:

AIDetect-benchmarked
/

Deepfake-Detector

Sleeping

App Files Files Community

Deepfake-Detector / tools /analysis_tools /check_videos.py

AZIIIIIIIIZ

Upload 1039 files

d670799 verified 3 months ago

raw

history blame contribute delete

5.62 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import argparse
	import os
	import warnings
	from functools import partial
	from multiprocessing import Manager, cpu_count

	import numpy as np
	from mmengine import Config, DictAction, track_parallel_progress
	from mmengine.registry import init_default_scope

	from mmaction.registry import DATASETS, TRANSFORMS


	def parse_args():
	parser = argparse.ArgumentParser(description='MMAction2 check datasets')
	parser.add_argument('config', help='test config file path')
	parser.add_argument(
	'--options',
	nargs='+',
	action=DictAction,
	default={},
	help='custom options for evaluation, the key-value pair in xxx=yyy '
	'format will be kwargs for dataset.evaluate() function (deprecate), '
	'change to --eval-options instead.')
	parser.add_argument(
	'--cfg-options',
	nargs='+',
	action=DictAction,
	default={},
	help='override some settings in the used config, the key-value pair '
	'in xxx=yyy format will be merged into config file. For example, '
	"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
	parser.add_argument(
	'--output-file',
	default='invalid-video.txt',
	help='Output file path which keeps corrupted/missing video file paths')
	parser.add_argument(
	'--split',
	default='train',
	choices=['train', 'val', 'test'],
	help='Dataset split')
	parser.add_argument(
	'--decoder',
	default='decord',
	choices=['decord', 'opencv', 'pyav'],
	help='Video decoder type, should be one of [decord, opencv, pyav]')
	parser.add_argument(
	'--nproc',
	type=int,
	default=(cpu_count() - 1 or 1),
	help='Number of processes to check videos')
	parser.add_argument(
	'--remove-corrupted-videos',
	action='store_true',
	help='Whether to delete all corrupted videos')
	args = parser.parse_args()

	if args.options and args.eval_options:
	raise ValueError(
	'--options and --eval-options cannot be both '
	'specified, --options is deprecated in favor of --eval-options')
	if args.options:
	warnings.warn('--options is deprecated in favor of --eval-options')
	args.eval_options = args.options
	return args


	@TRANSFORMS.register_module()
	class RandomSampleFrames:

	def __call__(self, results):
	"""Select frames to verify.

	Select the first, last and three random frames, Required key is
	"total_frames", added or modified key is "frame_inds".
	Args:
	results (dict): The resulting dict to be modified and passed
	to the next transform in pipeline.
	"""
	assert results['total_frames'] > 0

	# first and last frames
	results['frame_inds'] = np.array([0, results['total_frames'] - 1])

	# choose 3 random frames
	if results['total_frames'] > 2:
	results['frame_inds'] = np.concatenate([
	results['frame_inds'],
	np.random.randint(1, results['total_frames'] - 1, 3)
	])

	return results


	def _do_check_videos(lock, pipeline, output_file, data_info):
	try:
	pipeline(data_info)
	except: # noqa
	# save invalid video path to output file
	lock.acquire()
	with open(output_file, 'a') as f:
	f.write(data_info['filename'] + '\n')
	lock.release()


	if __name__ == '__main__':
	args = parse_args()

	decoder_to_pipeline_prefix = dict(
	decord='Decord', opencv='OpenCV', pyav='PyAV')

	# read config file
	cfg = Config.fromfile(args.config)
	cfg.merge_from_dict(args.cfg_options)
	init_default_scope(cfg.get('default_scope', 'mmaction'))

	# build dataset
	dataset_cfg = cfg.get(f'{args.split}_dataloader').dataset
	dataset_type = dataset_cfg.type
	assert dataset_type == 'VideoDataset'
	dataset_cfg.pipeline = [
	dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Init'),
	dict(type='RandomSampleFrames'),
	dict(type=decoder_to_pipeline_prefix[args.decoder] + 'Decode')
	]

	dataset = DATASETS.build(dataset_cfg)
	dataset_cfg.pop('type')
	pipeline = dataset.pipeline

	# prepare for checking
	if os.path.exists(args.output_file):
	# remove existing output file
	os.remove(args.output_file)

	lock = Manager().Lock()
	worker_fn = partial(_do_check_videos, lock, pipeline, args.output_file)
	# avoid copy dataset for multiprocess
	data_info_list = [
	dataset.get_data_info(idx) for idx in range(len(dataset))
	]

	# start checking
	track_parallel_progress(worker_fn, data_info_list, nproc=args.nproc)

	if os.path.exists(args.output_file):
	num_lines = sum(1 for _ in open(args.output_file))
	print(f'Checked {len(dataset)} videos, '
	f'{num_lines} are corrupted/missing.')
	if args.remove_corrupted_videos:
	print('Start deleting corrupted videos')
	cnt = 0
	with open(args.output_file, 'r') as f:
	for line in f:
	if os.path.exists(line.strip()):
	os.remove(line.strip())
	cnt += 1
	print(f'Deleted {cnt} corrupted videos.')
	else:
	print(f'Checked {len(dataset)} videos, none are corrupted/missing')