|
|
|
|
|
from typing import Dict, List, Optional, Sequence, Tuple, Union
|
|
|
|
|
|
import mmengine
|
|
|
import numpy as np
|
|
|
from mmengine.infer import BaseInferencer
|
|
|
from mmengine.structures import InstanceData
|
|
|
|
|
|
from mmaction.utils import ConfigType
|
|
|
from .actionrecog_inferencer import ActionRecogInferencer
|
|
|
|
|
|
InstanceList = List[InstanceData]
|
|
|
InputType = Union[str, np.ndarray]
|
|
|
InputsType = Union[InputType, Sequence[InputType]]
|
|
|
PredType = Union[InstanceData, InstanceList]
|
|
|
ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
|
|
|
|
|
|
|
|
|
class MMAction2Inferencer(BaseInferencer):
|
|
|
"""MMAction2 Inferencer. It's a unified inferencer interface for video
|
|
|
analyse task, currently including: ActionRecog. and it can be used to
|
|
|
perform end-to-end action recognition inference.
|
|
|
|
|
|
Args:
|
|
|
rec (str, optional): Pretrained action recognition algorithm.
|
|
|
It's the path to the config file or the model name defined in
|
|
|
metafile. For example, it could be:
|
|
|
|
|
|
- model alias, e.g. ``'slowfast'``,
|
|
|
- config name, e.g. ``'slowfast_r50_8xb8-8x8x1-256e_kinetics400
|
|
|
-rgb'``,
|
|
|
- config path
|
|
|
|
|
|
Defaults to ``None``.
|
|
|
rec_weights (str, optional): Path to the custom checkpoint file of
|
|
|
the selected rec model. If it is not specified and "rec" is a model
|
|
|
name of metafile, the weights will be loaded from metafile.
|
|
|
Defaults to None.
|
|
|
device (str, optional): Device to run inference. For example,
|
|
|
it could be 'cuda' or 'cpu'. If None, the available
|
|
|
device will be automatically used. Defaults to None.
|
|
|
label_file (str, optional): label file for dataset.
|
|
|
input_format (str): Input video format, Choices are 'video',
|
|
|
'rawframes', 'array'. 'video' means input data is a video file,
|
|
|
'rawframes' means input data is a video frame folder, and 'array'
|
|
|
means input data is a np.ndarray. Defaults to 'video'.
|
|
|
"""
|
|
|
|
|
|
preprocess_kwargs: set = set()
|
|
|
forward_kwargs: set = set()
|
|
|
visualize_kwargs: set = {
|
|
|
'return_vis', 'show', 'wait_time', 'vid_out_dir', 'draw_pred', 'fps',
|
|
|
'out_type', 'target_resolution'
|
|
|
}
|
|
|
postprocess_kwargs: set = {
|
|
|
'print_result', 'pred_out_file', 'return_datasample'
|
|
|
}
|
|
|
|
|
|
def __init__(self,
|
|
|
rec: Optional[str] = None,
|
|
|
rec_weights: Optional[str] = None,
|
|
|
device: Optional[str] = None,
|
|
|
label_file: Optional[str] = None,
|
|
|
input_format: str = 'video') -> None:
|
|
|
|
|
|
if rec is None:
|
|
|
raise ValueError('rec algorithm should provided.')
|
|
|
|
|
|
self.visualizer = None
|
|
|
self.num_visualized_imgs = 0
|
|
|
|
|
|
if rec is not None:
|
|
|
self.actionrecog_inferencer = ActionRecogInferencer(
|
|
|
rec, rec_weights, device, label_file, input_format)
|
|
|
self.mode = 'rec'
|
|
|
|
|
|
def _init_pipeline(self, cfg: ConfigType) -> None:
|
|
|
pass
|
|
|
|
|
|
def forward(self, inputs: InputType, batch_size: int,
|
|
|
**forward_kwargs) -> PredType:
|
|
|
"""Forward the inputs to the model.
|
|
|
|
|
|
Args:
|
|
|
inputs (InputsType): The inputs to be forwarded.
|
|
|
batch_size (int): Batch size. Defaults to 1.
|
|
|
|
|
|
Returns:
|
|
|
Dict: The prediction results. Possibly with keys "rec".
|
|
|
"""
|
|
|
result = {}
|
|
|
if self.mode == 'rec':
|
|
|
predictions = self.actionrecog_inferencer(
|
|
|
inputs,
|
|
|
return_datasamples=True,
|
|
|
batch_size=batch_size,
|
|
|
**forward_kwargs)['predictions']
|
|
|
result['rec'] = [[p] for p in predictions]
|
|
|
|
|
|
return result
|
|
|
|
|
|
def visualize(self, inputs: InputsType, preds: PredType,
|
|
|
**kwargs) -> List[np.ndarray]:
|
|
|
"""Visualize predictions.
|
|
|
|
|
|
Args:
|
|
|
inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
|
|
|
preds (List[Dict]): Predictions of the model.
|
|
|
show (bool): Whether to display the image in a popup window.
|
|
|
Defaults to False.
|
|
|
wait_time (float): The interval of show (s). Defaults to 0.
|
|
|
draw_pred (bool): Whether to draw predicted bounding boxes.
|
|
|
Defaults to True.
|
|
|
fps (int): Frames per second for saving video. Defaults to 4.
|
|
|
out_type (str): Output format type, choose from 'img', 'gif',
|
|
|
'video'. Defaults to ``'img'``.
|
|
|
target_resolution (Tuple[int], optional): Set to
|
|
|
(desired_width desired_height) to have resized frames. If
|
|
|
either dimension is None, the frames are resized by keeping
|
|
|
the existing aspect ratio. Defaults to None.
|
|
|
vid_out_dir (str): Output directory of visualization results.
|
|
|
If left as empty, no file will be saved. Defaults to ''.
|
|
|
"""
|
|
|
|
|
|
if 'rec' in self.mode:
|
|
|
return self.actionrecog_inferencer.visualize(
|
|
|
inputs, preds['rec'][0], **kwargs)
|
|
|
|
|
|
def __call__(
|
|
|
self,
|
|
|
inputs: InputsType,
|
|
|
batch_size: int = 1,
|
|
|
**kwargs,
|
|
|
) -> dict:
|
|
|
"""Call the inferencer.
|
|
|
|
|
|
Args:
|
|
|
inputs (InputsType): Inputs for the inferencer. It can be a path
|
|
|
to image / image directory, or an array, or a list of these.
|
|
|
return_datasamples (bool): Whether to return results as
|
|
|
:obj:`BaseDataElement`. Defaults to False.
|
|
|
batch_size (int): Batch size. Defaults to 1.
|
|
|
**kwargs: Key words arguments passed to :meth:`preprocess`,
|
|
|
:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
|
|
|
Each key in kwargs should be in the corresponding set of
|
|
|
``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
|
|
|
and ``postprocess_kwargs``.
|
|
|
|
|
|
Returns:
|
|
|
dict: Inference and visualization results.
|
|
|
"""
|
|
|
(
|
|
|
preprocess_kwargs,
|
|
|
forward_kwargs,
|
|
|
visualize_kwargs,
|
|
|
postprocess_kwargs,
|
|
|
) = self._dispatch_kwargs(**kwargs)
|
|
|
|
|
|
ori_inputs = self._inputs_to_list(inputs)
|
|
|
|
|
|
preds = self.forward(ori_inputs, batch_size, **forward_kwargs)
|
|
|
|
|
|
visualization = self.visualize(
|
|
|
ori_inputs, preds,
|
|
|
**visualize_kwargs)
|
|
|
results = self.postprocess(preds, visualization, **postprocess_kwargs)
|
|
|
return results
|
|
|
|
|
|
def _inputs_to_list(self, inputs: InputsType) -> list:
|
|
|
"""Preprocess the inputs to a list. The main difference from mmengine
|
|
|
version is that we don't list a directory cause input could be a frame
|
|
|
folder.
|
|
|
|
|
|
Preprocess inputs to a list according to its type:
|
|
|
|
|
|
- list or tuple: return inputs
|
|
|
- str: return a list containing the string. The string
|
|
|
could be a path to file, a url or other types of string according
|
|
|
to the task.
|
|
|
|
|
|
Args:
|
|
|
inputs (InputsType): Inputs for the inferencer.
|
|
|
|
|
|
Returns:
|
|
|
list: List of input for the :meth:`preprocess`.
|
|
|
"""
|
|
|
if not isinstance(inputs, (list, tuple)):
|
|
|
inputs = [inputs]
|
|
|
|
|
|
return list(inputs)
|
|
|
|
|
|
def postprocess(self,
|
|
|
preds: PredType,
|
|
|
visualization: Optional[List[np.ndarray]] = None,
|
|
|
print_result: bool = False,
|
|
|
pred_out_file: str = ''
|
|
|
) -> Union[ResType, Tuple[ResType, np.ndarray]]:
|
|
|
"""Postprocess predictions.
|
|
|
|
|
|
Args:
|
|
|
preds (Dict): Predictions of the model.
|
|
|
visualization (Optional[np.ndarray]): Visualized predictions.
|
|
|
print_result (bool): Whether to print the result.
|
|
|
Defaults to False.
|
|
|
pred_out_file (str): Output file name to store predictions
|
|
|
without images. Supported file formats are “json”, “yaml/yml”
|
|
|
and “pickle/pkl”. Defaults to ''.
|
|
|
|
|
|
Returns:
|
|
|
Dict or List[Dict]: Each dict contains the inference result of
|
|
|
each image. Possible keys are "rec_labels", "rec_scores"
|
|
|
"""
|
|
|
|
|
|
result_dict = {}
|
|
|
pred_results = [{} for _ in range(len(next(iter(preds.values()))))]
|
|
|
if 'rec' in self.mode:
|
|
|
for i, rec_pred in enumerate(preds['rec']):
|
|
|
result = dict(rec_labels=[], rec_scores=[])
|
|
|
for rec_pred_instance in rec_pred:
|
|
|
rec_dict_res = self.actionrecog_inferencer.pred2dict(
|
|
|
rec_pred_instance)
|
|
|
result['rec_labels'].append(rec_dict_res['pred_labels'])
|
|
|
result['rec_scores'].append(rec_dict_res['pred_scores'])
|
|
|
pred_results[i].update(result)
|
|
|
|
|
|
result_dict['predictions'] = pred_results
|
|
|
if print_result:
|
|
|
print(result_dict)
|
|
|
if pred_out_file != '':
|
|
|
mmengine.dump(result_dict, pred_out_file)
|
|
|
result_dict['visualization'] = visualization
|
|
|
return result_dict
|
|
|
|