Spaces:

AIDetect-benchmarked
/

Deepfake-Detector

Sleeping

App Files Files Community

Deepfake-Detector / mmaction /apis /inferencers /mmaction2_inferencer.py

AZIIIIIIIIZ

Upload 1039 files

d670799 verified 3 months ago

raw

history blame contribute delete

9.45 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from typing import Dict, List, Optional, Sequence, Tuple, Union

	import mmengine
	import numpy as np
	from mmengine.infer import BaseInferencer
	from mmengine.structures import InstanceData

	from mmaction.utils import ConfigType
	from .actionrecog_inferencer import ActionRecogInferencer

	InstanceList = List[InstanceData]
	InputType = Union[str, np.ndarray]
	InputsType = Union[InputType, Sequence[InputType]]
	PredType = Union[InstanceData, InstanceList]
	ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]


	class MMAction2Inferencer(BaseInferencer):
	"""MMAction2 Inferencer. It's a unified inferencer interface for video
	analyse task, currently including: ActionRecog. and it can be used to
	perform end-to-end action recognition inference.

	Args:
	rec (str, optional): Pretrained action recognition algorithm.
	It's the path to the config file or the model name defined in
	metafile. For example, it could be:

	- model alias, e.g. ``'slowfast'``,
	- config name, e.g. ``'slowfast_r50_8xb8-8x8x1-256e_kinetics400
	-rgb'``,
	- config path

	Defaults to ``None``.
	rec_weights (str, optional): Path to the custom checkpoint file of
	the selected rec model. If it is not specified and "rec" is a model
	name of metafile, the weights will be loaded from metafile.
	Defaults to None.
	device (str, optional): Device to run inference. For example,
	it could be 'cuda' or 'cpu'. If None, the available
	device will be automatically used. Defaults to None.
	label_file (str, optional): label file for dataset.
	input_format (str): Input video format, Choices are 'video',
	'rawframes', 'array'. 'video' means input data is a video file,
	'rawframes' means input data is a video frame folder, and 'array'
	means input data is a np.ndarray. Defaults to 'video'.
	"""

	preprocess_kwargs: set = set()
	forward_kwargs: set = set()
	visualize_kwargs: set = {
	'return_vis', 'show', 'wait_time', 'vid_out_dir', 'draw_pred', 'fps',
	'out_type', 'target_resolution'
	}
	postprocess_kwargs: set = {
	'print_result', 'pred_out_file', 'return_datasample'
	}

	def __init__(self,
	rec: Optional[str] = None,
	rec_weights: Optional[str] = None,
	device: Optional[str] = None,
	label_file: Optional[str] = None,
	input_format: str = 'video') -> None:

	if rec is None:
	raise ValueError('rec algorithm should provided.')

	self.visualizer = None
	self.num_visualized_imgs = 0

	if rec is not None:
	self.actionrecog_inferencer = ActionRecogInferencer(
	rec, rec_weights, device, label_file, input_format)
	self.mode = 'rec'

	def _init_pipeline(self, cfg: ConfigType) -> None:
	pass

	def forward(self, inputs: InputType, batch_size: int,
	**forward_kwargs) -> PredType:
	"""Forward the inputs to the model.

	Args:
	inputs (InputsType): The inputs to be forwarded.
	batch_size (int): Batch size. Defaults to 1.

	Returns:
	Dict: The prediction results. Possibly with keys "rec".
	"""
	result = {}
	if self.mode == 'rec':
	predictions = self.actionrecog_inferencer(
	inputs,
	return_datasamples=True,
	batch_size=batch_size,
	**forward_kwargs)['predictions']
	result['rec'] = [[p] for p in predictions]

	return result

	def visualize(self, inputs: InputsType, preds: PredType,
	**kwargs) -> List[np.ndarray]:
	"""Visualize predictions.

	Args:
	inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
	preds (List[Dict]): Predictions of the model.
	show (bool): Whether to display the image in a popup window.
	Defaults to False.
	wait_time (float): The interval of show (s). Defaults to 0.
	draw_pred (bool): Whether to draw predicted bounding boxes.
	Defaults to True.
	fps (int): Frames per second for saving video. Defaults to 4.
	out_type (str): Output format type, choose from 'img', 'gif',
	'video'. Defaults to ``'img'``.
	target_resolution (Tuple[int], optional): Set to
	(desired_width desired_height) to have resized frames. If
	either dimension is None, the frames are resized by keeping
	the existing aspect ratio. Defaults to None.
	vid_out_dir (str): Output directory of visualization results.
	If left as empty, no file will be saved. Defaults to ''.
	"""

	if 'rec' in self.mode:
	return self.actionrecog_inferencer.visualize(
	inputs, preds['rec'][0], **kwargs)

	def __call__(
	self,
	inputs: InputsType,
	batch_size: int = 1,
	**kwargs,
	) -> dict:
	"""Call the inferencer.

	Args:
	inputs (InputsType): Inputs for the inferencer. It can be a path
	to image / image directory, or an array, or a list of these.
	return_datasamples (bool): Whether to return results as
	:obj:`BaseDataElement`. Defaults to False.
	batch_size (int): Batch size. Defaults to 1.
	**kwargs: Key words arguments passed to :meth:`preprocess`,
	:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
	Each key in kwargs should be in the corresponding set of
	``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
	and ``postprocess_kwargs``.

	Returns:
	dict: Inference and visualization results.
	"""
	(
	preprocess_kwargs,
	forward_kwargs,
	visualize_kwargs,
	postprocess_kwargs,
	) = self._dispatch_kwargs(**kwargs)

	ori_inputs = self._inputs_to_list(inputs)

	preds = self.forward(ori_inputs, batch_size, **forward_kwargs)

	visualization = self.visualize(
	ori_inputs, preds,
	**visualize_kwargs) # type: ignore # noqa: E501
	results = self.postprocess(preds, visualization, **postprocess_kwargs)
	return results

	def _inputs_to_list(self, inputs: InputsType) -> list:
	"""Preprocess the inputs to a list. The main difference from mmengine
	version is that we don't list a directory cause input could be a frame
	folder.

	Preprocess inputs to a list according to its type:

	- list or tuple: return inputs
	- str: return a list containing the string. The string
	could be a path to file, a url or other types of string according
	to the task.

	Args:
	inputs (InputsType): Inputs for the inferencer.

	Returns:
	list: List of input for the :meth:`preprocess`.
	"""
	if not isinstance(inputs, (list, tuple)):
	inputs = [inputs]

	return list(inputs)

	def postprocess(self,
	preds: PredType,
	visualization: Optional[List[np.ndarray]] = None,
	print_result: bool = False,
	pred_out_file: str = ''
	) -> Union[ResType, Tuple[ResType, np.ndarray]]:
	"""Postprocess predictions.

	Args:
	preds (Dict): Predictions of the model.
	visualization (Optional[np.ndarray]): Visualized predictions.
	print_result (bool): Whether to print the result.
	Defaults to False.
	pred_out_file (str): Output file name to store predictions
	without images. Supported file formats are “json”, “yaml/yml”
	and “pickle/pkl”. Defaults to ''.

	Returns:
	Dict or List[Dict]: Each dict contains the inference result of
	each image. Possible keys are "rec_labels", "rec_scores"
	"""

	result_dict = {}
	pred_results = [{} for _ in range(len(next(iter(preds.values()))))]
	if 'rec' in self.mode:
	for i, rec_pred in enumerate(preds['rec']):
	result = dict(rec_labels=[], rec_scores=[])
	for rec_pred_instance in rec_pred:
	rec_dict_res = self.actionrecog_inferencer.pred2dict(
	rec_pred_instance)
	result['rec_labels'].append(rec_dict_res['pred_labels'])
	result['rec_scores'].append(rec_dict_res['pred_scores'])
	pred_results[i].update(result)

	result_dict['predictions'] = pred_results
	if print_result:
	print(result_dict)
	if pred_out_file != '':
	mmengine.dump(result_dict, pred_out_file)
	result_dict['visualization'] = visualization
	return result_dict