Spaces:

nguyenminh4099
/

Demo

Paused

App Files Files Community

Demo / src /models /taskers /checker.py

nguyenminh4099

Upload folder using huggingface_hub

9411c06 verified over 1 year ago

raw

history blame contribute delete

3.77 kB

	import os
	import subprocess
	import json

	from src.models.taskers.tasker import Tasker
	from src.models.utils.logging import get_logger

	logger = get_logger(
	name='Checker',
	is_stream=True,
	log_path=None,
	)


	class Checker(Tasker):
	"""Check condition of sample"""

	_SAMPLE_RATE: int = 16000
	_FRAME_RATE: int = 25
	_EXTENSION: str = '.mp4'

	def __init__(self, duration_threshold: int = 3, **kwargs):
	"""
	Initialize checker

	path_file:
	Path to video file
	args:
	Positional arguments
	kwargs:
	Keyword arguments.
	"""
	super().__init__()
	self._SAMPLE_RATE = kwargs.get('sample_rate', self._FRAME_RATE)
	self._FRAME_RATE = kwargs.get('frame_rate', self._SAMPLE_RATE)
	self._EXTENSION = kwargs.get('extension', self._EXTENSION)
	self._threshold_duration: int = duration_threshold

	def do(self, video_path: str, args, *kwargs) -> dict:

	_stream_dict = self._get_metadata_streams(video_path=video_path)

	metadata_dict = dict()
	metadata_dict['video_path'] = video_path
	metadata_dict['duration'] = _stream_dict['duration']
	metadata_dict['has_v'] = 'visual' in _stream_dict
	metadata_dict['has_a'] = 'audio' in _stream_dict
	metadata_dict['v_codec'] = None
	metadata_dict['a_codec'] = None
	if metadata_dict['has_v']:
	metadata_dict['v_codec'] = _stream_dict['visual']['codec_name']
	if metadata_dict['has_a']:
	metadata_dict['a_codec'] = _stream_dict['audio']['codec_name']

	self.post_do(metadata_dict)
	return metadata_dict

	def post_do(self, metadata_dict: dict):
	if metadata_dict['duration'] > self._threshold_duration:
	logger.critical("Duration of video (%.3f) over threshold (%.3f)" % (metadata_dict['duration'], self._threshold_duration))
	raise RuntimeError()
	n_con = 0
	if not metadata_dict['has_v']:
	logger.info(f"Video in '{metadata_dict['video_path']}\' has no visual.")
	n_con += 1
	if not metadata_dict['has_a']:
	logger.info(f"Video in '{metadata_dict['video_path']}\' has no audio.")
	n_con += 1
	if n_con == 2:
	logger.critical(f"INVALID. Video in '{metadata_dict['video_path']}\' has no visual and audio.")
	raise RuntimeError()

	def _get_metadata_streams(self, video_path: str) -> dict:
	"""
	Get metadata of streams

	Return:
	Dictionary contains metadata.
	"""
	if not os.path.isfile(path=video_path):
	logger.error(f"The video file '{video_path}' not exist.")
	raise RuntimeError()

	cmd = [
	"ffprobe",
	"-v", "error",
	"-show_entries",
	"stream=codec_type:"
	"stream=codec_name:"
	"stream=index:"
	"stream=sample_rate:"
	"stream=r_frame_rate:"
	"format=duration",
	"-print_format", "json",
	"%s" % video_path
	]
	stdout = subprocess.run(
	cmd,
	shell=False,
	capture_output=True,
	stdout=None,
	stderr=None,
	cwd=os.getcwd(),
	).stdout
	result = json.loads(stdout)

	_streams_dict = dict()
	_streams_dict['duration'] = float(result['format']['duration'])

	for stream in result['streams']:
	if stream['codec_type'] == 'video':
	_streams_dict['visual'] = stream
	elif stream['codec_type'] == 'audio':
	_streams_dict['audio'] = stream

	return _streams_dict