Deepfake-Detector / mmaction /apis /inferencers /mmaction2_inferencer.py
AZIIIIIIIIZ's picture
Upload 1039 files
d670799 verified
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Sequence, Tuple, Union
import mmengine
import numpy as np
from mmengine.infer import BaseInferencer
from mmengine.structures import InstanceData
from mmaction.utils import ConfigType
from .actionrecog_inferencer import ActionRecogInferencer
InstanceList = List[InstanceData]
InputType = Union[str, np.ndarray]
InputsType = Union[InputType, Sequence[InputType]]
PredType = Union[InstanceData, InstanceList]
ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
class MMAction2Inferencer(BaseInferencer):
"""MMAction2 Inferencer. It's a unified inferencer interface for video
analyse task, currently including: ActionRecog. and it can be used to
perform end-to-end action recognition inference.
Args:
rec (str, optional): Pretrained action recognition algorithm.
It's the path to the config file or the model name defined in
metafile. For example, it could be:
- model alias, e.g. ``'slowfast'``,
- config name, e.g. ``'slowfast_r50_8xb8-8x8x1-256e_kinetics400
-rgb'``,
- config path
Defaults to ``None``.
rec_weights (str, optional): Path to the custom checkpoint file of
the selected rec model. If it is not specified and "rec" is a model
name of metafile, the weights will be loaded from metafile.
Defaults to None.
device (str, optional): Device to run inference. For example,
it could be 'cuda' or 'cpu'. If None, the available
device will be automatically used. Defaults to None.
label_file (str, optional): label file for dataset.
input_format (str): Input video format, Choices are 'video',
'rawframes', 'array'. 'video' means input data is a video file,
'rawframes' means input data is a video frame folder, and 'array'
means input data is a np.ndarray. Defaults to 'video'.
"""
preprocess_kwargs: set = set()
forward_kwargs: set = set()
visualize_kwargs: set = {
'return_vis', 'show', 'wait_time', 'vid_out_dir', 'draw_pred', 'fps',
'out_type', 'target_resolution'
}
postprocess_kwargs: set = {
'print_result', 'pred_out_file', 'return_datasample'
}
def __init__(self,
rec: Optional[str] = None,
rec_weights: Optional[str] = None,
device: Optional[str] = None,
label_file: Optional[str] = None,
input_format: str = 'video') -> None:
if rec is None:
raise ValueError('rec algorithm should provided.')
self.visualizer = None
self.num_visualized_imgs = 0
if rec is not None:
self.actionrecog_inferencer = ActionRecogInferencer(
rec, rec_weights, device, label_file, input_format)
self.mode = 'rec'
def _init_pipeline(self, cfg: ConfigType) -> None:
pass
def forward(self, inputs: InputType, batch_size: int,
**forward_kwargs) -> PredType:
"""Forward the inputs to the model.
Args:
inputs (InputsType): The inputs to be forwarded.
batch_size (int): Batch size. Defaults to 1.
Returns:
Dict: The prediction results. Possibly with keys "rec".
"""
result = {}
if self.mode == 'rec':
predictions = self.actionrecog_inferencer(
inputs,
return_datasamples=True,
batch_size=batch_size,
**forward_kwargs)['predictions']
result['rec'] = [[p] for p in predictions]
return result
def visualize(self, inputs: InputsType, preds: PredType,
**kwargs) -> List[np.ndarray]:
"""Visualize predictions.
Args:
inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
preds (List[Dict]): Predictions of the model.
show (bool): Whether to display the image in a popup window.
Defaults to False.
wait_time (float): The interval of show (s). Defaults to 0.
draw_pred (bool): Whether to draw predicted bounding boxes.
Defaults to True.
fps (int): Frames per second for saving video. Defaults to 4.
out_type (str): Output format type, choose from 'img', 'gif',
'video'. Defaults to ``'img'``.
target_resolution (Tuple[int], optional): Set to
(desired_width desired_height) to have resized frames. If
either dimension is None, the frames are resized by keeping
the existing aspect ratio. Defaults to None.
vid_out_dir (str): Output directory of visualization results.
If left as empty, no file will be saved. Defaults to ''.
"""
if 'rec' in self.mode:
return self.actionrecog_inferencer.visualize(
inputs, preds['rec'][0], **kwargs)
def __call__(
self,
inputs: InputsType,
batch_size: int = 1,
**kwargs,
) -> dict:
"""Call the inferencer.
Args:
inputs (InputsType): Inputs for the inferencer. It can be a path
to image / image directory, or an array, or a list of these.
return_datasamples (bool): Whether to return results as
:obj:`BaseDataElement`. Defaults to False.
batch_size (int): Batch size. Defaults to 1.
**kwargs: Key words arguments passed to :meth:`preprocess`,
:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
Each key in kwargs should be in the corresponding set of
``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
and ``postprocess_kwargs``.
Returns:
dict: Inference and visualization results.
"""
(
preprocess_kwargs,
forward_kwargs,
visualize_kwargs,
postprocess_kwargs,
) = self._dispatch_kwargs(**kwargs)
ori_inputs = self._inputs_to_list(inputs)
preds = self.forward(ori_inputs, batch_size, **forward_kwargs)
visualization = self.visualize(
ori_inputs, preds,
**visualize_kwargs) # type: ignore # noqa: E501
results = self.postprocess(preds, visualization, **postprocess_kwargs)
return results
def _inputs_to_list(self, inputs: InputsType) -> list:
"""Preprocess the inputs to a list. The main difference from mmengine
version is that we don't list a directory cause input could be a frame
folder.
Preprocess inputs to a list according to its type:
- list or tuple: return inputs
- str: return a list containing the string. The string
could be a path to file, a url or other types of string according
to the task.
Args:
inputs (InputsType): Inputs for the inferencer.
Returns:
list: List of input for the :meth:`preprocess`.
"""
if not isinstance(inputs, (list, tuple)):
inputs = [inputs]
return list(inputs)
def postprocess(self,
preds: PredType,
visualization: Optional[List[np.ndarray]] = None,
print_result: bool = False,
pred_out_file: str = ''
) -> Union[ResType, Tuple[ResType, np.ndarray]]:
"""Postprocess predictions.
Args:
preds (Dict): Predictions of the model.
visualization (Optional[np.ndarray]): Visualized predictions.
print_result (bool): Whether to print the result.
Defaults to False.
pred_out_file (str): Output file name to store predictions
without images. Supported file formats are “json”, “yaml/yml”
and “pickle/pkl”. Defaults to ''.
Returns:
Dict or List[Dict]: Each dict contains the inference result of
each image. Possible keys are "rec_labels", "rec_scores"
"""
result_dict = {}
pred_results = [{} for _ in range(len(next(iter(preds.values()))))]
if 'rec' in self.mode:
for i, rec_pred in enumerate(preds['rec']):
result = dict(rec_labels=[], rec_scores=[])
for rec_pred_instance in rec_pred:
rec_dict_res = self.actionrecog_inferencer.pred2dict(
rec_pred_instance)
result['rec_labels'].append(rec_dict_res['pred_labels'])
result['rec_scores'].append(rec_dict_res['pred_scores'])
pred_results[i].update(result)
result_dict['predictions'] = pred_results
if print_result:
print(result_dict)
if pred_out_file != '':
mmengine.dump(result_dict, pred_out_file)
result_dict['visualization'] = visualization
return result_dict