|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import copy |
|
|
import random |
|
|
import warnings |
|
|
from itertools import product |
|
|
from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union |
|
|
|
|
|
import mmengine |
|
|
import numpy as np |
|
|
|
|
|
import mmcv |
|
|
from mmcv.image.geometric import _scale_size |
|
|
from .base import BaseTransform |
|
|
from .builder import TRANSFORMS |
|
|
from .utils import cache_randomness |
|
|
from .wrappers import Compose |
|
|
|
|
|
Number = Union[int, float] |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class Normalize(BaseTransform): |
|
|
"""Normalize the image. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- img_norm_cfg |
|
|
|
|
|
- mean |
|
|
- std |
|
|
- to_rgb |
|
|
|
|
|
|
|
|
Args: |
|
|
mean (sequence): Mean values of 3 channels. |
|
|
std (sequence): Std values of 3 channels. |
|
|
to_rgb (bool): Whether to convert the image from BGR to RGB before |
|
|
normlizing the image. If ``to_rgb=True``, the order of mean and std |
|
|
should be RGB. If ``to_rgb=False``, the order of mean and std |
|
|
should be the same order of the image. Defaults to True. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
mean: Sequence[Number], |
|
|
std: Sequence[Number], |
|
|
to_rgb: bool = True) -> None: |
|
|
self.mean = np.array(mean, dtype=np.float32) |
|
|
self.std = np.array(std, dtype=np.float32) |
|
|
self.to_rgb = to_rgb |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Function to normalize images. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
|
|
|
Returns: |
|
|
dict: Normalized results, key 'img_norm_cfg' key is added in to |
|
|
result dict. |
|
|
""" |
|
|
|
|
|
results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, |
|
|
self.to_rgb) |
|
|
results['img_norm_cfg'] = dict( |
|
|
mean=self.mean, std=self.std, to_rgb=self.to_rgb) |
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class Resize(BaseTransform): |
|
|
"""Resize images & bbox & seg & keypoints. |
|
|
|
|
|
This transform resizes the input image according to ``scale`` or |
|
|
``scale_factor``. Bboxes, seg map and keypoints are then resized with the |
|
|
same scale factor. |
|
|
if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to |
|
|
resize. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes |
|
|
- gt_seg_map |
|
|
- gt_keypoints |
|
|
- img_shape |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- scale |
|
|
- scale_factor |
|
|
- keep_ratio |
|
|
|
|
|
Args: |
|
|
scale (int or tuple): Images scales for resizing. Defaults to None |
|
|
scale_factor (float or tuple[float]): Scale factors for resizing. |
|
|
Defaults to None. |
|
|
keep_ratio (bool): Whether to keep the aspect ratio when resizing the |
|
|
image. Defaults to False. |
|
|
clip_object_border (bool): Whether to clip the objects |
|
|
outside the border of the image. In some dataset like MOT17, the gt |
|
|
bboxes are allowed to cross the border of images. Therefore, we |
|
|
don't need to clip the gt bboxes in these cases. Defaults to True. |
|
|
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. |
|
|
These two backends generates slightly different results. Defaults |
|
|
to 'cv2'. |
|
|
interpolation (str): Interpolation method, accepted values are |
|
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
|
backend, "nearest", "bilinear" for 'pillow' backend. Defaults |
|
|
to 'bilinear'. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
scale: Optional[Union[int, Tuple[int, int]]] = None, |
|
|
scale_factor: Optional[Union[float, Tuple[float, |
|
|
float]]] = None, |
|
|
keep_ratio: bool = False, |
|
|
clip_object_border: bool = True, |
|
|
backend: str = 'cv2', |
|
|
interpolation='bilinear') -> None: |
|
|
assert scale is not None or scale_factor is not None, ( |
|
|
'`scale` and' |
|
|
'`scale_factor` can not both be `None`') |
|
|
if scale is None: |
|
|
self.scale = None |
|
|
else: |
|
|
if isinstance(scale, int): |
|
|
self.scale = (scale, scale) |
|
|
else: |
|
|
self.scale = scale |
|
|
|
|
|
self.backend = backend |
|
|
self.interpolation = interpolation |
|
|
self.keep_ratio = keep_ratio |
|
|
self.clip_object_border = clip_object_border |
|
|
if scale_factor is None: |
|
|
self.scale_factor = None |
|
|
elif isinstance(scale_factor, float): |
|
|
self.scale_factor = (scale_factor, scale_factor) |
|
|
elif isinstance(scale_factor, tuple): |
|
|
assert (len(scale_factor)) == 2 |
|
|
self.scale_factor = scale_factor |
|
|
else: |
|
|
raise TypeError( |
|
|
f'expect scale_factor is float or Tuple(float), but' |
|
|
f'get {type(scale_factor)}') |
|
|
|
|
|
def _resize_img(self, results: dict) -> None: |
|
|
"""Resize images with ``results['scale']``.""" |
|
|
if results.get('img', None) is not None: |
|
|
if self.keep_ratio: |
|
|
img, scale_factor = mmcv.imrescale( |
|
|
results['img'], |
|
|
results['scale'], |
|
|
interpolation=self.interpolation, |
|
|
return_scale=True, |
|
|
backend=self.backend) |
|
|
|
|
|
|
|
|
new_h, new_w = img.shape[:2] |
|
|
h, w = results['img'].shape[:2] |
|
|
w_scale = new_w / w |
|
|
h_scale = new_h / h |
|
|
else: |
|
|
img, w_scale, h_scale = mmcv.imresize( |
|
|
results['img'], |
|
|
results['scale'], |
|
|
interpolation=self.interpolation, |
|
|
return_scale=True, |
|
|
backend=self.backend) |
|
|
results['img'] = img |
|
|
results['img_shape'] = img.shape[:2] |
|
|
results['scale_factor'] = (w_scale, h_scale) |
|
|
results['keep_ratio'] = self.keep_ratio |
|
|
|
|
|
def _resize_bboxes(self, results: dict) -> None: |
|
|
"""Resize bounding boxes with ``results['scale_factor']``.""" |
|
|
if results.get('gt_bboxes', None) is not None: |
|
|
bboxes = results['gt_bboxes'] * np.tile( |
|
|
np.array(results['scale_factor']), 2) |
|
|
if self.clip_object_border: |
|
|
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, |
|
|
results['img_shape'][1]) |
|
|
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, |
|
|
results['img_shape'][0]) |
|
|
results['gt_bboxes'] = bboxes |
|
|
|
|
|
def _resize_seg(self, results: dict) -> None: |
|
|
"""Resize semantic segmentation map with ``results['scale']``.""" |
|
|
if results.get('gt_seg_map', None) is not None: |
|
|
if self.keep_ratio: |
|
|
gt_seg = mmcv.imrescale( |
|
|
results['gt_seg_map'], |
|
|
results['scale'], |
|
|
interpolation='nearest', |
|
|
backend=self.backend) |
|
|
else: |
|
|
gt_seg = mmcv.imresize( |
|
|
results['gt_seg_map'], |
|
|
results['scale'], |
|
|
interpolation='nearest', |
|
|
backend=self.backend) |
|
|
results['gt_seg_map'] = gt_seg |
|
|
|
|
|
def _resize_keypoints(self, results: dict) -> None: |
|
|
"""Resize keypoints with ``results['scale_factor']``.""" |
|
|
if results.get('gt_keypoints', None) is not None: |
|
|
keypoints = results['gt_keypoints'] |
|
|
|
|
|
keypoints[:, :, :2] = keypoints[:, :, :2] * np.array( |
|
|
results['scale_factor']) |
|
|
if self.clip_object_border: |
|
|
keypoints[:, :, 0] = np.clip(keypoints[:, :, 0], 0, |
|
|
results['img_shape'][1]) |
|
|
keypoints[:, :, 1] = np.clip(keypoints[:, :, 1], 0, |
|
|
results['img_shape'][0]) |
|
|
results['gt_keypoints'] = keypoints |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Transform function to resize images, bounding boxes, semantic |
|
|
segmentation map and keypoints. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
Returns: |
|
|
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', |
|
|
'gt_keypoints', 'scale', 'scale_factor', 'img_shape', |
|
|
and 'keep_ratio' keys are updated in result dict. |
|
|
""" |
|
|
|
|
|
if self.scale: |
|
|
results['scale'] = self.scale |
|
|
else: |
|
|
img_shape = results['img'].shape[:2] |
|
|
results['scale'] = _scale_size(img_shape[::-1], |
|
|
self.scale_factor) |
|
|
self._resize_img(results) |
|
|
self._resize_bboxes(results) |
|
|
self._resize_seg(results) |
|
|
self._resize_keypoints(results) |
|
|
return results |
|
|
|
|
|
def __repr__(self): |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(scale={self.scale}, ' |
|
|
repr_str += f'scale_factor={self.scale_factor}, ' |
|
|
repr_str += f'keep_ratio={self.keep_ratio}, ' |
|
|
repr_str += f'clip_object_border={self.clip_object_border}), ' |
|
|
repr_str += f'backend={self.backend}), ' |
|
|
repr_str += f'interpolation={self.interpolation})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class Pad(BaseTransform): |
|
|
"""Pad the image & segmentation map. |
|
|
|
|
|
There are three padding modes: (1) pad to a fixed size and (2) pad to the |
|
|
minimum size that is divisible by some number. and (3)pad to square. Also, |
|
|
pad to square and pad to the minimum size can be used as the same time. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- gt_seg_map |
|
|
- img_shape |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- pad_shape |
|
|
- pad_fixed_size |
|
|
- pad_size_divisor |
|
|
|
|
|
Args: |
|
|
size (tuple, optional): Fixed padding size. |
|
|
Expected padding shape (w, h). Defaults to None. |
|
|
size_divisor (int, optional): The divisor of padded size. Defaults to |
|
|
None. |
|
|
pad_to_square (bool): Whether to pad the image into a square. |
|
|
Currently only used for YOLOX. Defaults to False. |
|
|
pad_val (Number | dict[str, Number], optional): Padding value for if |
|
|
the pad_mode is "constant". If it is a single number, the value |
|
|
to pad the image is the number and to pad the semantic |
|
|
segmentation map is 255. If it is a dict, it should have the |
|
|
following keys: |
|
|
|
|
|
- img: The value to pad the image. |
|
|
- seg: The value to pad the semantic segmentation map. |
|
|
|
|
|
Defaults to dict(img=0, seg=255). |
|
|
padding_mode (str): Type of padding. Should be: constant, edge, |
|
|
reflect or symmetric. Defaults to 'constant'. |
|
|
|
|
|
- constant: pads with a constant value, this value is specified |
|
|
with pad_val. |
|
|
- edge: pads with the last value at the edge of the image. |
|
|
- reflect: pads with reflection of image without repeating the last |
|
|
value on the edge. For example, padding [1, 2, 3, 4] with 2 |
|
|
elements on both sides in reflect mode will result in |
|
|
[3, 2, 1, 2, 3, 4, 3, 2]. |
|
|
- symmetric: pads with reflection of image repeating the last value |
|
|
on the edge. For example, padding [1, 2, 3, 4] with 2 elements on |
|
|
both sides in symmetric mode will result in |
|
|
[2, 1, 1, 2, 3, 4, 4, 3] |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
size: Optional[Tuple[int, int]] = None, |
|
|
size_divisor: Optional[int] = None, |
|
|
pad_to_square: bool = False, |
|
|
pad_val: Union[Number, dict] = dict(img=0, seg=255), |
|
|
padding_mode: str = 'constant') -> None: |
|
|
self.size = size |
|
|
self.size_divisor = size_divisor |
|
|
if isinstance(pad_val, int): |
|
|
pad_val = dict(img=pad_val, seg=255) |
|
|
assert isinstance(pad_val, dict), 'pad_val ' |
|
|
self.pad_val = pad_val |
|
|
self.pad_to_square = pad_to_square |
|
|
|
|
|
if pad_to_square: |
|
|
assert size is None, \ |
|
|
'The size and size_divisor must be None ' \ |
|
|
'when pad2square is True' |
|
|
else: |
|
|
assert size is not None or size_divisor is not None, \ |
|
|
'only one of size and size_divisor should be valid' |
|
|
assert size is None or size_divisor is None |
|
|
assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] |
|
|
self.padding_mode = padding_mode |
|
|
|
|
|
def _pad_img(self, results: dict) -> None: |
|
|
"""Pad images according to ``self.size``.""" |
|
|
pad_val = self.pad_val.get('img', 0) |
|
|
|
|
|
size = None |
|
|
if self.pad_to_square: |
|
|
max_size = max(results['img'].shape[:2]) |
|
|
size = (max_size, max_size) |
|
|
if self.size_divisor is not None: |
|
|
if size is None: |
|
|
size = (results['img'].shape[0], results['img'].shape[1]) |
|
|
pad_h = int(np.ceil( |
|
|
size[0] / self.size_divisor)) * self.size_divisor |
|
|
pad_w = int(np.ceil( |
|
|
size[1] / self.size_divisor)) * self.size_divisor |
|
|
size = (pad_h, pad_w) |
|
|
elif self.size is not None: |
|
|
size = self.size[::-1] |
|
|
if isinstance(pad_val, int) and results['img'].ndim == 3: |
|
|
pad_val = tuple(pad_val for _ in range(results['img'].shape[2])) |
|
|
padded_img = mmcv.impad( |
|
|
results['img'], |
|
|
shape=size, |
|
|
pad_val=pad_val, |
|
|
padding_mode=self.padding_mode) |
|
|
|
|
|
results['img'] = padded_img |
|
|
results['pad_shape'] = padded_img.shape |
|
|
results['pad_fixed_size'] = self.size |
|
|
results['pad_size_divisor'] = self.size_divisor |
|
|
results['img_shape'] = padded_img.shape[:2] |
|
|
|
|
|
def _pad_seg(self, results: dict) -> None: |
|
|
"""Pad semantic segmentation map according to |
|
|
``results['pad_shape']``.""" |
|
|
if results.get('gt_seg_map', None) is not None: |
|
|
pad_val = self.pad_val.get('seg', 255) |
|
|
if isinstance(pad_val, int) and results['gt_seg_map'].ndim == 3: |
|
|
pad_val = tuple( |
|
|
pad_val for _ in range(results['gt_seg_map'].shape[2])) |
|
|
results['gt_seg_map'] = mmcv.impad( |
|
|
results['gt_seg_map'], |
|
|
shape=results['pad_shape'][:2], |
|
|
pad_val=pad_val, |
|
|
padding_mode=self.padding_mode) |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Call function to pad images, masks, semantic segmentation maps. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
|
|
|
Returns: |
|
|
dict: Updated result dict. |
|
|
""" |
|
|
self._pad_img(results) |
|
|
self._pad_seg(results) |
|
|
return results |
|
|
|
|
|
def __repr__(self): |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(size={self.size}, ' |
|
|
repr_str += f'size_divisor={self.size_divisor}, ' |
|
|
repr_str += f'pad_to_square={self.pad_to_square}, ' |
|
|
repr_str += f'pad_val={self.pad_val}), ' |
|
|
repr_str += f'padding_mode={self.padding_mode})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class CenterCrop(BaseTransform): |
|
|
"""Crop the center of the image, segmentation masks, bounding boxes and key |
|
|
points. If the crop area exceeds the original image and ``auto_pad`` is |
|
|
True, the original image will be padded before cropping. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_seg_map (optional) |
|
|
- gt_bboxes (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- img_shape |
|
|
- gt_seg_map (optional) |
|
|
- gt_bboxes (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Added Key: |
|
|
|
|
|
- pad_shape |
|
|
|
|
|
|
|
|
Args: |
|
|
crop_size (Union[int, Tuple[int, int]]): Expected size after cropping |
|
|
with the format of (w, h). If set to an integer, then cropping |
|
|
width and height are equal to this integer. |
|
|
auto_pad (bool): Whether to pad the image if it's smaller than the |
|
|
``crop_size``. Defaults to False. |
|
|
pad_cfg (dict): Base config for padding. Refer to ``mmcv.Pad`` for |
|
|
detail. Defaults to ``dict(type='Pad')``. |
|
|
clip_object_border (bool): Whether to clip the objects |
|
|
outside the border of the image. In some dataset like MOT17, the |
|
|
gt bboxes are allowed to cross the border of images. Therefore, |
|
|
we don't need to clip the gt bboxes in these cases. |
|
|
Defaults to True. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
crop_size: Union[int, Tuple[int, int]], |
|
|
auto_pad: bool = False, |
|
|
pad_cfg: dict = dict(type='Pad'), |
|
|
clip_object_border: bool = True) -> None: |
|
|
super().__init__() |
|
|
assert isinstance(crop_size, int) or ( |
|
|
isinstance(crop_size, tuple) and len(crop_size) == 2 |
|
|
), 'The expected crop_size is an integer, or a tuple containing two ' |
|
|
'intergers' |
|
|
|
|
|
if isinstance(crop_size, int): |
|
|
crop_size = (crop_size, crop_size) |
|
|
assert crop_size[0] > 0 and crop_size[1] > 0 |
|
|
self.crop_size = crop_size |
|
|
self.auto_pad = auto_pad |
|
|
|
|
|
self.pad_cfg = pad_cfg.copy() |
|
|
|
|
|
if 'size' in self.pad_cfg and auto_pad: |
|
|
warnings.warn('``size`` is set in ``pad_cfg``,' |
|
|
'however this argument will be overwritten' |
|
|
' according to crop size and image size') |
|
|
|
|
|
self.clip_object_border = clip_object_border |
|
|
|
|
|
def _crop_img(self, results: dict, bboxes: np.ndarray) -> None: |
|
|
"""Crop image. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. |
|
|
""" |
|
|
if results.get('img', None) is not None: |
|
|
img = mmcv.imcrop(results['img'], bboxes=bboxes) |
|
|
img_shape = img.shape[:2] |
|
|
results['img'] = img |
|
|
results['img_shape'] = img_shape |
|
|
results['pad_shape'] = img_shape |
|
|
|
|
|
def _crop_seg_map(self, results: dict, bboxes: np.ndarray) -> None: |
|
|
"""Crop semantic segmentation map. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. |
|
|
""" |
|
|
if results.get('gt_seg_map', None) is not None: |
|
|
img = mmcv.imcrop(results['gt_seg_map'], bboxes=bboxes) |
|
|
results['gt_seg_map'] = img |
|
|
|
|
|
def _crop_bboxes(self, results: dict, bboxes: np.ndarray) -> None: |
|
|
"""Update bounding boxes according to CenterCrop. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. |
|
|
""" |
|
|
if 'gt_bboxes' in results: |
|
|
offset_w = bboxes[0] |
|
|
offset_h = bboxes[1] |
|
|
bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h]) |
|
|
|
|
|
|
|
|
gt_bboxes = results['gt_bboxes'] - bbox_offset |
|
|
if self.clip_object_border: |
|
|
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, |
|
|
results['img'].shape[1]) |
|
|
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, |
|
|
results['img'].shape[0]) |
|
|
results['gt_bboxes'] = gt_bboxes |
|
|
|
|
|
def _crop_keypoints(self, results: dict, bboxes: np.ndarray) -> None: |
|
|
"""Update key points according to CenterCrop. Keypoints that not in the |
|
|
cropped image will be set invisible. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. |
|
|
""" |
|
|
if 'gt_keypoints' in results: |
|
|
offset_w = bboxes[0] |
|
|
offset_h = bboxes[1] |
|
|
keypoints_offset = np.array([offset_w, offset_h, 0]) |
|
|
|
|
|
|
|
|
gt_keypoints = results['gt_keypoints'] - keypoints_offset |
|
|
|
|
|
height, width = results['img'].shape[:2] |
|
|
valid_pos = (gt_keypoints[:, :, 0] >= |
|
|
0) * (gt_keypoints[:, :, 0] < |
|
|
width) * (gt_keypoints[:, :, 1] >= 0) * ( |
|
|
gt_keypoints[:, :, 1] < height) |
|
|
gt_keypoints[:, :, 2] = np.where(valid_pos, gt_keypoints[:, :, 2], |
|
|
0) |
|
|
gt_keypoints[:, :, 0] = np.clip(gt_keypoints[:, :, 0], 0, |
|
|
results['img'].shape[1]) |
|
|
gt_keypoints[:, :, 1] = np.clip(gt_keypoints[:, :, 1], 0, |
|
|
results['img'].shape[0]) |
|
|
results['gt_keypoints'] = gt_keypoints |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Apply center crop on results. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
|
|
|
Returns: |
|
|
dict: Results with CenterCropped image and semantic segmentation |
|
|
map. |
|
|
""" |
|
|
crop_width, crop_height = self.crop_size[0], self.crop_size[1] |
|
|
|
|
|
assert 'img' in results, '`img` is not found in results' |
|
|
img = results['img'] |
|
|
|
|
|
img_height, img_width = img.shape[:2] |
|
|
|
|
|
if crop_height > img_height or crop_width > img_width: |
|
|
if self.auto_pad: |
|
|
|
|
|
img_height = max(img_height, crop_height) |
|
|
img_width = max(img_width, crop_width) |
|
|
pad_size = (img_width, img_height) |
|
|
_pad_cfg = self.pad_cfg.copy() |
|
|
_pad_cfg.update(dict(size=pad_size)) |
|
|
pad_transform = TRANSFORMS.build(_pad_cfg) |
|
|
results = pad_transform(results) |
|
|
else: |
|
|
crop_height = min(crop_height, img_height) |
|
|
crop_width = min(crop_width, img_width) |
|
|
|
|
|
y1 = max(0, int(round((img_height - crop_height) / 2.))) |
|
|
x1 = max(0, int(round((img_width - crop_width) / 2.))) |
|
|
y2 = min(img_height, y1 + crop_height) - 1 |
|
|
x2 = min(img_width, x1 + crop_width) - 1 |
|
|
bboxes = np.array([x1, y1, x2, y2]) |
|
|
|
|
|
|
|
|
self._crop_img(results, bboxes) |
|
|
|
|
|
self._crop_seg_map(results, bboxes) |
|
|
|
|
|
self._crop_bboxes(results, bboxes) |
|
|
|
|
|
self._crop_keypoints(results, bboxes) |
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(crop_size = {self.crop_size}' |
|
|
repr_str += f', auto_pad={self.auto_pad}' |
|
|
repr_str += f', pad_cfg={self.pad_cfg}' |
|
|
repr_str += f',clip_object_border = {self.clip_object_border})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class RandomGrayscale(BaseTransform): |
|
|
"""Randomly convert image to grayscale with a probability. |
|
|
|
|
|
Required Key: |
|
|
|
|
|
- img |
|
|
|
|
|
Modified Key: |
|
|
|
|
|
- img |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- grayscale |
|
|
- grayscale_weights |
|
|
|
|
|
Args: |
|
|
prob (float): Probability that image should be converted to |
|
|
grayscale. Defaults to 0.1. |
|
|
keep_channels (bool): Whether keep channel number the same as |
|
|
input. Defaults to False. |
|
|
channel_weights (tuple): The grayscale weights of each channel, |
|
|
and the weights will be normalized. For example, (1, 2, 1) |
|
|
will be normalized as (0.25, 0.5, 0.25). Defaults to |
|
|
(1., 1., 1.). |
|
|
color_format (str): Color format set to be any of 'bgr', |
|
|
'rgb', 'hsv'. Note: 'hsv' image will be transformed into 'bgr' |
|
|
format no matter whether it is grayscaled. Defaults to 'bgr'. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
prob: float = 0.1, |
|
|
keep_channels: bool = False, |
|
|
channel_weights: Sequence[float] = (1., 1., 1.), |
|
|
color_format: str = 'bgr') -> None: |
|
|
super().__init__() |
|
|
assert 0. <= prob <= 1., ('The range of ``prob`` value is [0., 1.],' + |
|
|
f' but got {prob} instead') |
|
|
self.prob = prob |
|
|
self.keep_channels = keep_channels |
|
|
self.channel_weights = channel_weights |
|
|
assert color_format in ['bgr', 'rgb', 'hsv'] |
|
|
self.color_format = color_format |
|
|
|
|
|
@cache_randomness |
|
|
def _random_prob(self): |
|
|
return random.random() |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Apply random grayscale on results. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
|
|
|
Returns: |
|
|
dict: Results with grayscale image. |
|
|
""" |
|
|
img = results['img'] |
|
|
|
|
|
if self.color_format == 'hsv': |
|
|
img = mmcv.hsv2bgr(img) |
|
|
img = img[..., None] if img.ndim == 2 else img |
|
|
num_output_channels = img.shape[2] |
|
|
if self._random_prob() < self.prob: |
|
|
if num_output_channels > 1: |
|
|
assert num_output_channels == len( |
|
|
self.channel_weights |
|
|
), 'The length of ``channel_weights`` are supposed to be ' |
|
|
f'num_output_channels, but got {len(self.channel_weights)}' |
|
|
' instead.' |
|
|
normalized_weights = ( |
|
|
np.array(self.channel_weights) / sum(self.channel_weights)) |
|
|
img = (normalized_weights * img).sum(axis=2) |
|
|
img = img.astype('uint8') |
|
|
if self.keep_channels: |
|
|
img = img[:, :, None] |
|
|
results['img'] = np.dstack( |
|
|
[img for _ in range(num_output_channels)]) |
|
|
else: |
|
|
results['img'] = img |
|
|
return results |
|
|
img = img.astype('uint8') |
|
|
results['img'] = img |
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(prob = {self.prob}' |
|
|
repr_str += f', keep_channels = {self.keep_channels}' |
|
|
repr_str += f', channel_weights = {self.channel_weights}' |
|
|
repr_str += f', color_format = {self.color_format})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class MultiScaleFlipAug(BaseTransform): |
|
|
"""Test-time augmentation with multiple scales and flipping. |
|
|
|
|
|
An example configuration is as followed: |
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
dict( |
|
|
type='MultiScaleFlipAug', |
|
|
scales=[(1333, 400), (1333, 800)], |
|
|
flip=True, |
|
|
transforms=[ |
|
|
dict(type='Normalize', **img_norm_cfg), |
|
|
dict(type='Pad', size_divisor=1), |
|
|
dict(type='ImageToTensor', keys=['img']), |
|
|
dict(type='Collect', keys=['img']) |
|
|
]) |
|
|
|
|
|
``results`` will be resized using all the sizes in ``scales``. |
|
|
If ``flip`` is True, then flipped results will also be added into output |
|
|
list. |
|
|
|
|
|
For the above configuration, there are four combinations of resize |
|
|
and flip: |
|
|
|
|
|
- Resize to (1333, 400) + no flip |
|
|
- Resize to (1333, 400) + flip |
|
|
- Resize to (1333, 800) + no flip |
|
|
- resize to (1333, 800) + flip |
|
|
|
|
|
The four results are then transformed with ``transforms`` argument. |
|
|
After that, results are wrapped into lists of the same length as below: |
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
dict( |
|
|
inputs=[...], |
|
|
data_samples=[...] |
|
|
) |
|
|
|
|
|
Where the length of ``inputs`` and ``data_samples`` are both 4. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- Depending on the requirements of the ``transforms`` parameter. |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- All output keys of each transform. |
|
|
|
|
|
Args: |
|
|
transforms (list[dict]): Transforms to be applied to each resized |
|
|
and flipped data. |
|
|
scales (tuple | list[tuple] | None): Images scales for resizing. |
|
|
scale_factor (float or tuple[float]): Scale factors for resizing. |
|
|
Defaults to None. |
|
|
allow_flip (bool): Whether apply flip augmentation. Defaults to False. |
|
|
flip_direction (str | list[str]): Flip augmentation directions, |
|
|
options are "horizontal", "vertical" and "diagonal". If |
|
|
flip_direction is a list, multiple flip augmentations will be |
|
|
applied. It has no effect when flip == False. Defaults to |
|
|
"horizontal". |
|
|
resize_cfg (dict): Base config for resizing. Defaults to |
|
|
``dict(type='Resize', keep_ratio=True)``. |
|
|
flip_cfg (dict): Base config for flipping. Defaults to |
|
|
``dict(type='RandomFlip')``. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
transforms: List[dict], |
|
|
scales: Optional[Union[Tuple, List[Tuple]]] = None, |
|
|
scale_factor: Optional[Union[float, List[float]]] = None, |
|
|
allow_flip: bool = False, |
|
|
flip_direction: Union[str, List[str]] = 'horizontal', |
|
|
resize_cfg: dict = dict(type='Resize', keep_ratio=True), |
|
|
flip_cfg: dict = dict(type='RandomFlip') |
|
|
) -> None: |
|
|
super().__init__() |
|
|
self.transforms = Compose(transforms) |
|
|
|
|
|
if scales is not None: |
|
|
self.scales = scales if isinstance(scales, list) else [scales] |
|
|
self.scale_key = 'scale' |
|
|
assert mmengine.is_list_of(self.scales, tuple) |
|
|
else: |
|
|
|
|
|
if scale_factor is None: |
|
|
self.scales = [1.] |
|
|
elif isinstance(scale_factor, list): |
|
|
self.scales = scale_factor |
|
|
else: |
|
|
self.scales = [scale_factor] |
|
|
|
|
|
self.scale_key = 'scale_factor' |
|
|
|
|
|
self.allow_flip = allow_flip |
|
|
self.flip_direction = flip_direction if isinstance( |
|
|
flip_direction, list) else [flip_direction] |
|
|
assert mmengine.is_list_of(self.flip_direction, str) |
|
|
if not self.allow_flip and self.flip_direction != ['horizontal']: |
|
|
warnings.warn( |
|
|
'flip_direction has no effect when flip is set to False') |
|
|
self.resize_cfg = resize_cfg.copy() |
|
|
self.flip_cfg = flip_cfg |
|
|
|
|
|
def transform(self, results: dict) -> Dict: |
|
|
"""Apply test time augment transforms on results. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
|
|
|
Returns: |
|
|
dict: The augmented data, where each value is wrapped |
|
|
into a list. |
|
|
""" |
|
|
|
|
|
data_samples = [] |
|
|
inputs = [] |
|
|
flip_args = [(False, '')] |
|
|
if self.allow_flip: |
|
|
flip_args += [(True, direction) |
|
|
for direction in self.flip_direction] |
|
|
for scale in self.scales: |
|
|
for flip, direction in flip_args: |
|
|
_resize_cfg = self.resize_cfg.copy() |
|
|
_resize_cfg.update({self.scale_key: scale}) |
|
|
_resize_flip = [_resize_cfg] |
|
|
|
|
|
if flip: |
|
|
_flip_cfg = self.flip_cfg.copy() |
|
|
_flip_cfg.update(prob=1.0, direction=direction) |
|
|
_resize_flip.append(_flip_cfg) |
|
|
else: |
|
|
results['flip'] = False |
|
|
results['flip_direction'] = None |
|
|
|
|
|
resize_flip = Compose(_resize_flip) |
|
|
_results = resize_flip(results.copy()) |
|
|
packed_results = self.transforms(_results) |
|
|
|
|
|
inputs.append(packed_results['inputs']) |
|
|
data_samples.append( |
|
|
packed_results['data_sample']) |
|
|
return dict(inputs=inputs, data_sample=data_samples) |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(transforms={self.transforms}' |
|
|
repr_str += f', scales={self.scales}' |
|
|
repr_str += f', allow_flip={self.allow_flip}' |
|
|
repr_str += f', flip_direction={self.flip_direction})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class TestTimeAug(BaseTransform): |
|
|
"""Test-time augmentation transform. |
|
|
|
|
|
An example configuration is as followed: |
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
dict(type='TestTimeAug', |
|
|
transforms=[ |
|
|
[dict(type='Resize', scale=(1333, 400), keep_ratio=True), |
|
|
dict(type='Resize', scale=(1333, 800), keep_ratio=True)], |
|
|
[dict(type='RandomFlip', prob=1.), |
|
|
dict(type='RandomFlip', prob=0.)], |
|
|
[dict(type='PackDetInputs', |
|
|
meta_keys=('img_id', 'img_path', 'ori_shape', |
|
|
'img_shape', 'scale_factor', 'flip', |
|
|
'flip_direction'))]]) |
|
|
|
|
|
``results`` will be transformed using all transforms defined in |
|
|
``transforms`` arguments. |
|
|
|
|
|
For the above configuration, there are four combinations of resize |
|
|
and flip: |
|
|
|
|
|
- Resize to (1333, 400) + no flip |
|
|
- Resize to (1333, 400) + flip |
|
|
- Resize to (1333, 800) + no flip |
|
|
- resize to (1333, 800) + flip |
|
|
|
|
|
After that, results are wrapped into lists of the same length as below: |
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
dict( |
|
|
inputs=[...], |
|
|
data_samples=[...] |
|
|
) |
|
|
|
|
|
The length of ``inputs`` and ``data_samples`` are both 4. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- Depending on the requirements of the ``transforms`` parameter. |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- All output keys of each transform. |
|
|
|
|
|
Args: |
|
|
transforms (list[list[dict]]): Transforms to be applied to data sampled |
|
|
from dataset. ``transforms`` is a list of list, and each list |
|
|
element usually represents a series of transforms with the same |
|
|
type and different arguments. Data will be processed by each list |
|
|
elements sequentially. See more information in :meth:`transform`. |
|
|
""" |
|
|
|
|
|
def __init__(self, transforms: list): |
|
|
for i, transform_list in enumerate(transforms): |
|
|
for j, transform in enumerate(transform_list): |
|
|
if isinstance(transform, dict): |
|
|
transform_list[j] = TRANSFORMS.build(transform) |
|
|
elif callable(transform): |
|
|
continue |
|
|
else: |
|
|
raise TypeError( |
|
|
'transform must be callable or a dict, but got' |
|
|
f' {type(transform)}') |
|
|
transforms[i] = transform_list |
|
|
|
|
|
self.subroutines = [ |
|
|
Compose(subroutine) for subroutine in product(*transforms) |
|
|
] |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Apply all transforms defined in :attr:`transforms` to the results. |
|
|
|
|
|
As the example given in :obj:`TestTimeAug`, ``transforms`` consists of |
|
|
2 ``Resize``, 2 ``RandomFlip`` and 1 ``PackDetInputs``. |
|
|
The data sampled from dataset will be processed as follows: |
|
|
|
|
|
1. Data will be processed by 2 ``Resize`` and return a list |
|
|
of 2 results. |
|
|
2. Each result in list will be further passed to 2 |
|
|
``RandomFlip``, and aggregates into a list of 4 results. |
|
|
3. Each result will be processed by ``PackDetInputs``, and |
|
|
return a list of dict. |
|
|
4. Aggregates the same fields of results, and finally returns |
|
|
a dict. Each value of the dict represents 4 transformed |
|
|
results. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
|
|
|
Returns: |
|
|
dict: The augmented data, where each value is wrapped |
|
|
into a list. |
|
|
""" |
|
|
results_list = [] |
|
|
for subroutine in self.subroutines: |
|
|
result = subroutine(copy.deepcopy(results)) |
|
|
assert isinstance(result, dict), ( |
|
|
f'Data processed by {subroutine} must return a dict, but got ' |
|
|
f'{result}') |
|
|
assert result is not None, ( |
|
|
f'Data processed by {subroutine} in `TestTimeAug` should not ' |
|
|
'be None! Please check your validation dataset and the ' |
|
|
f'transforms in {subroutine}') |
|
|
results_list.append(result) |
|
|
|
|
|
aug_data_dict = { |
|
|
key: [item[key] for item in results_list] |
|
|
for key in results_list[0] |
|
|
} |
|
|
return aug_data_dict |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += 'transforms=\n' |
|
|
for subroutine in self.subroutines: |
|
|
repr_str += f'{repr(subroutine)}\n' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class RandomChoiceResize(BaseTransform): |
|
|
"""Resize images & bbox & mask from a list of multiple scales. |
|
|
|
|
|
This transform resizes the input image to some scale. Bboxes and masks are |
|
|
then resized with the same scale factor. Resize scale will be randomly |
|
|
selected from ``scales``. |
|
|
|
|
|
How to choose the target scale to resize the image will follow the rules |
|
|
below: |
|
|
|
|
|
- if `scale` is a list of tuple, the target scale is sampled from the list |
|
|
uniformally. |
|
|
- if `scale` is a tuple, the target scale will be set to the tuple. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- img_shape |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- scale |
|
|
- scale_factor |
|
|
- scale_idx |
|
|
- keep_ratio |
|
|
|
|
|
|
|
|
Args: |
|
|
scales (Union[list, Tuple]): Images scales for resizing. |
|
|
resize_type (str): The type of resize class to use. Defaults to |
|
|
"Resize". |
|
|
**resize_kwargs: Other keyword arguments for the ``resize_type``. |
|
|
|
|
|
Note: |
|
|
By defaults, the ``resize_type`` is "Resize", if it's not overwritten |
|
|
by your registry, it indicates the :class:`mmcv.Resize`. And therefore, |
|
|
``resize_kwargs`` accepts any keyword arguments of it, like |
|
|
``keep_ratio``, ``interpolation`` and so on. |
|
|
|
|
|
If you want to use your custom resize class, the class should accept |
|
|
``scale`` argument and have ``scale`` attribution which determines the |
|
|
resize shape. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
scales: Sequence[Union[int, Tuple]], |
|
|
resize_type: str = 'Resize', |
|
|
**resize_kwargs, |
|
|
) -> None: |
|
|
super().__init__() |
|
|
if isinstance(scales, list): |
|
|
self.scales = scales |
|
|
else: |
|
|
self.scales = [scales] |
|
|
assert mmengine.is_seq_of(self.scales, (tuple, int)) |
|
|
|
|
|
self.resize_cfg = dict(type=resize_type, **resize_kwargs) |
|
|
|
|
|
self.resize = TRANSFORMS.build({'scale': 0, **self.resize_cfg}) |
|
|
|
|
|
@cache_randomness |
|
|
def _random_select(self) -> Tuple[int, int]: |
|
|
"""Randomly select an scale from given candidates. |
|
|
|
|
|
Returns: |
|
|
(tuple, int): Returns a tuple ``(scale, scale_dix)``, |
|
|
where ``scale`` is the selected image scale and |
|
|
``scale_idx`` is the selected index in the given candidates. |
|
|
""" |
|
|
|
|
|
scale_idx = np.random.randint(len(self.scales)) |
|
|
scale = self.scales[scale_idx] |
|
|
return scale, scale_idx |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Apply resize transforms on results from a list of scales. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict contains the data to transform. |
|
|
|
|
|
Returns: |
|
|
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', |
|
|
'gt_keypoints', 'scale', 'scale_factor', 'img_shape', |
|
|
and 'keep_ratio' keys are updated in result dict. |
|
|
""" |
|
|
|
|
|
target_scale, scale_idx = self._random_select() |
|
|
self.resize.scale = target_scale |
|
|
results = self.resize(results) |
|
|
results['scale_idx'] = scale_idx |
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(scales={self.scales}' |
|
|
repr_str += f', resize_cfg={self.resize_cfg})' |
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class RandomFlip(BaseTransform): |
|
|
"""Flip the image & bbox & keypoints & segmentation map. Added or Updated |
|
|
keys: flip, flip_direction, img, gt_bboxes, gt_seg_map, and |
|
|
gt_keypoints. There are 3 flip modes: |
|
|
|
|
|
- ``prob`` is float, ``direction`` is string: the image will be |
|
|
``direction``ly flipped with probability of ``prob`` . |
|
|
E.g., ``prob=0.5``, ``direction='horizontal'``, |
|
|
then image will be horizontally flipped with probability of 0.5. |
|
|
|
|
|
- ``prob`` is float, ``direction`` is list of string: the image will |
|
|
be ``direction[i]``ly flipped with probability of |
|
|
``prob/len(direction)``. |
|
|
E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``, |
|
|
then image will be horizontally flipped with probability of 0.25, |
|
|
vertically with probability of 0.25. |
|
|
|
|
|
- ``prob`` is list of float, ``direction`` is list of string: |
|
|
given ``len(prob) == len(direction)``, the image will |
|
|
be ``direction[i]``ly flipped with probability of ``prob[i]``. |
|
|
E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal', |
|
|
'vertical']``, then image will be horizontally flipped with |
|
|
probability of 0.3, vertically with probability of 0.5. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes (optional) |
|
|
- gt_seg_map (optional) |
|
|
- gt_keypoints (optional) |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- flip |
|
|
- flip_direction |
|
|
- swap_seg_labels (optional) |
|
|
|
|
|
Args: |
|
|
prob (float | list[float], optional): The flipping probability. |
|
|
Defaults to None. |
|
|
direction(str | list[str]): The flipping direction. Options |
|
|
If input is a list, the length must equal ``prob``. Each |
|
|
element in ``prob`` indicates the flip probability of |
|
|
corresponding direction. Defaults to 'horizontal'. |
|
|
swap_seg_labels (list, optional): The label pair need to be swapped |
|
|
for ground truth, like 'left arm' and 'right arm' need to be |
|
|
swapped after horizontal flipping. For example, ``[(1, 5)]``, |
|
|
where 1/5 is the label of the left/right arm. Defaults to None. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
prob: Optional[Union[float, Iterable[float]]] = None, |
|
|
direction: Union[str, Sequence[Optional[str]]] = 'horizontal', |
|
|
swap_seg_labels: Optional[Sequence] = None) -> None: |
|
|
if isinstance(prob, list): |
|
|
assert mmengine.is_list_of(prob, float) |
|
|
assert 0 <= sum(prob) <= 1 |
|
|
elif isinstance(prob, float): |
|
|
assert 0 <= prob <= 1 |
|
|
else: |
|
|
raise ValueError(f'probs must be float or list of float, but \ |
|
|
got `{type(prob)}`.') |
|
|
self.prob = prob |
|
|
self.swap_seg_labels = swap_seg_labels |
|
|
|
|
|
valid_directions = ['horizontal', 'vertical', 'diagonal'] |
|
|
if isinstance(direction, str): |
|
|
assert direction in valid_directions |
|
|
elif isinstance(direction, list): |
|
|
assert mmengine.is_list_of(direction, str) |
|
|
assert set(direction).issubset(set(valid_directions)) |
|
|
else: |
|
|
raise ValueError(f'direction must be either str or list of str, \ |
|
|
but got `{type(direction)}`.') |
|
|
self.direction = direction |
|
|
|
|
|
if isinstance(prob, list): |
|
|
assert len(prob) == len(self.direction) |
|
|
|
|
|
def _flip_bbox(self, bboxes: np.ndarray, img_shape: Tuple[int, int], |
|
|
direction: str) -> np.ndarray: |
|
|
"""Flip bboxes horizontally. |
|
|
|
|
|
Args: |
|
|
bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k) |
|
|
img_shape (tuple[int]): Image shape (height, width) |
|
|
direction (str): Flip direction. Options are 'horizontal', |
|
|
'vertical', and 'diagonal'. |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray: Flipped bounding boxes. |
|
|
""" |
|
|
assert bboxes.shape[-1] % 4 == 0 |
|
|
flipped = bboxes.copy() |
|
|
h, w = img_shape |
|
|
if direction == 'horizontal': |
|
|
flipped[..., 0::4] = w - bboxes[..., 2::4] |
|
|
flipped[..., 2::4] = w - bboxes[..., 0::4] |
|
|
elif direction == 'vertical': |
|
|
flipped[..., 1::4] = h - bboxes[..., 3::4] |
|
|
flipped[..., 3::4] = h - bboxes[..., 1::4] |
|
|
elif direction == 'diagonal': |
|
|
flipped[..., 0::4] = w - bboxes[..., 2::4] |
|
|
flipped[..., 1::4] = h - bboxes[..., 3::4] |
|
|
flipped[..., 2::4] = w - bboxes[..., 0::4] |
|
|
flipped[..., 3::4] = h - bboxes[..., 1::4] |
|
|
else: |
|
|
raise ValueError( |
|
|
f"Flipping direction must be 'horizontal', 'vertical', \ |
|
|
or 'diagonal', but got '{direction}'") |
|
|
return flipped |
|
|
|
|
|
def _flip_keypoints( |
|
|
self, |
|
|
keypoints: np.ndarray, |
|
|
img_shape: Tuple[int, int], |
|
|
direction: str, |
|
|
) -> np.ndarray: |
|
|
"""Flip keypoints horizontally, vertically or diagonally. |
|
|
|
|
|
Args: |
|
|
keypoints (numpy.ndarray): Keypoints, shape (..., 2) |
|
|
img_shape (tuple[int]): Image shape (height, width) |
|
|
direction (str): Flip direction. Options are 'horizontal', |
|
|
'vertical', and 'diagonal'. |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray: Flipped keypoints. |
|
|
""" |
|
|
|
|
|
meta_info = keypoints[..., 2:] |
|
|
keypoints = keypoints[..., :2] |
|
|
flipped = keypoints.copy() |
|
|
h, w = img_shape |
|
|
if direction == 'horizontal': |
|
|
flipped[..., 0::2] = w - keypoints[..., 0::2] |
|
|
elif direction == 'vertical': |
|
|
flipped[..., 1::2] = h - keypoints[..., 1::2] |
|
|
elif direction == 'diagonal': |
|
|
flipped[..., 0::2] = w - keypoints[..., 0::2] |
|
|
flipped[..., 1::2] = h - keypoints[..., 1::2] |
|
|
else: |
|
|
raise ValueError( |
|
|
f"Flipping direction must be 'horizontal', 'vertical', \ |
|
|
or 'diagonal', but got '{direction}'") |
|
|
flipped = np.concatenate([flipped, meta_info], axis=-1) |
|
|
return flipped |
|
|
|
|
|
def _flip_seg_map(self, seg_map: dict, direction: str) -> np.ndarray: |
|
|
"""Flip segmentation map horizontally, vertically or diagonally. |
|
|
|
|
|
Args: |
|
|
seg_map (numpy.ndarray): segmentation map, shape (H, W). |
|
|
direction (str): Flip direction. Options are 'horizontal', |
|
|
'vertical'. |
|
|
|
|
|
Returns: |
|
|
numpy.ndarray: Flipped segmentation map. |
|
|
""" |
|
|
seg_map = mmcv.imflip(seg_map, direction=direction) |
|
|
if self.swap_seg_labels is not None: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
temp = seg_map.copy() |
|
|
assert isinstance(self.swap_seg_labels, (tuple, list)) |
|
|
for pair in self.swap_seg_labels: |
|
|
assert isinstance(pair, (tuple, list)) and len(pair) == 2, \ |
|
|
'swap_seg_labels must be a sequence with pair, but got ' \ |
|
|
f'{self.swap_seg_labels}.' |
|
|
seg_map[temp == pair[0]] = pair[1] |
|
|
seg_map[temp == pair[1]] = pair[0] |
|
|
return seg_map |
|
|
|
|
|
@cache_randomness |
|
|
def _choose_direction(self) -> str: |
|
|
"""Choose the flip direction according to `prob` and `direction`""" |
|
|
if isinstance(self.direction, |
|
|
Sequence) and not isinstance(self.direction, str): |
|
|
|
|
|
direction_list: list = list(self.direction) + [None] |
|
|
elif isinstance(self.direction, str): |
|
|
|
|
|
direction_list = [self.direction, None] |
|
|
|
|
|
if isinstance(self.prob, list): |
|
|
non_prob: float = 1 - sum(self.prob) |
|
|
prob_list = self.prob + [non_prob] |
|
|
elif isinstance(self.prob, float): |
|
|
non_prob = 1. - self.prob |
|
|
|
|
|
single_ratio = self.prob / (len(direction_list) - 1) |
|
|
prob_list = [single_ratio] * (len(direction_list) - 1) + [non_prob] |
|
|
|
|
|
cur_dir = np.random.choice(direction_list, p=prob_list) |
|
|
|
|
|
return cur_dir |
|
|
|
|
|
def _flip(self, results: dict) -> None: |
|
|
"""Flip images, bounding boxes, semantic segmentation map and |
|
|
keypoints.""" |
|
|
|
|
|
results['img'] = mmcv.imflip( |
|
|
results['img'], direction=results['flip_direction']) |
|
|
|
|
|
img_shape = results['img'].shape[:2] |
|
|
|
|
|
|
|
|
if results.get('gt_bboxes', None) is not None: |
|
|
results['gt_bboxes'] = self._flip_bbox(results['gt_bboxes'], |
|
|
img_shape, |
|
|
results['flip_direction']) |
|
|
|
|
|
|
|
|
if results.get('gt_keypoints', None) is not None: |
|
|
results['gt_keypoints'] = self._flip_keypoints( |
|
|
results['gt_keypoints'], img_shape, results['flip_direction']) |
|
|
|
|
|
|
|
|
if results.get('gt_seg_map', None) is not None: |
|
|
results['gt_seg_map'] = self._flip_seg_map( |
|
|
results['gt_seg_map'], direction=results['flip_direction']) |
|
|
results['swap_seg_labels'] = self.swap_seg_labels |
|
|
|
|
|
def _flip_on_direction(self, results: dict) -> None: |
|
|
"""Function to flip images, bounding boxes, semantic segmentation map |
|
|
and keypoints.""" |
|
|
cur_dir = self._choose_direction() |
|
|
if cur_dir is None: |
|
|
results['flip'] = False |
|
|
results['flip_direction'] = None |
|
|
else: |
|
|
results['flip'] = True |
|
|
results['flip_direction'] = cur_dir |
|
|
self._flip(results) |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Transform function to flip images, bounding boxes, semantic |
|
|
segmentation map and keypoints. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
|
|
|
Returns: |
|
|
dict: Flipped results, 'img', 'gt_bboxes', 'gt_seg_map', |
|
|
'gt_keypoints', 'flip', and 'flip_direction' keys are |
|
|
updated in result dict. |
|
|
""" |
|
|
self._flip_on_direction(results) |
|
|
|
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(prob={self.prob}, ' |
|
|
repr_str += f'direction={self.direction})' |
|
|
|
|
|
return repr_str |
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module() |
|
|
class RandomResize(BaseTransform): |
|
|
"""Random resize images & bbox & keypoints. |
|
|
|
|
|
How to choose the target scale to resize the image will follow the rules |
|
|
below: |
|
|
|
|
|
- if ``scale`` is a sequence of tuple |
|
|
|
|
|
.. math:: |
|
|
target\\_scale[0] \\sim Uniform([scale[0][0], scale[1][0]]) |
|
|
.. math:: |
|
|
target\\_scale[1] \\sim Uniform([scale[0][1], scale[1][1]]) |
|
|
|
|
|
Following the resize order of weight and height in cv2, ``scale[i][0]`` |
|
|
is for width, and ``scale[i][1]`` is for height. |
|
|
|
|
|
- if ``scale`` is a tuple |
|
|
|
|
|
.. math:: |
|
|
target\\_scale[0] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]]) |
|
|
* scale[0] |
|
|
.. math:: |
|
|
target\\_scale[1] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]]) |
|
|
* scale[1] |
|
|
|
|
|
Following the resize order of weight and height in cv2, ``ratio_range[0]`` |
|
|
is for width, and ``ratio_range[1]`` is for height. |
|
|
|
|
|
- if ``keep_ratio`` is True, the minimum value of ``target_scale`` will be |
|
|
used to set the shorter side and the maximum value will be used to |
|
|
set the longer side. |
|
|
|
|
|
- if ``keep_ratio`` is False, the value of ``target_scale`` will be used to |
|
|
reisze the width and height accordingly. |
|
|
|
|
|
Required Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes |
|
|
- gt_seg_map |
|
|
- gt_keypoints |
|
|
|
|
|
Modified Keys: |
|
|
|
|
|
- img |
|
|
- gt_bboxes |
|
|
- gt_seg_map |
|
|
- gt_keypoints |
|
|
- img_shape |
|
|
|
|
|
Added Keys: |
|
|
|
|
|
- scale |
|
|
- scale_factor |
|
|
- keep_ratio |
|
|
|
|
|
Args: |
|
|
scale (tuple or Sequence[tuple]): Images scales for resizing. |
|
|
Defaults to None. |
|
|
ratio_range (tuple[float], optional): (min_ratio, max_ratio). |
|
|
Defaults to None. |
|
|
resize_type (str): The type of resize class to use. Defaults to |
|
|
"Resize". |
|
|
**resize_kwargs: Other keyword arguments for the ``resize_type``. |
|
|
|
|
|
Note: |
|
|
By defaults, the ``resize_type`` is "Resize", if it's not overwritten |
|
|
by your registry, it indicates the :class:`mmcv.Resize`. And therefore, |
|
|
``resize_kwargs`` accepts any keyword arguments of it, like |
|
|
``keep_ratio``, ``interpolation`` and so on. |
|
|
|
|
|
If you want to use your custom resize class, the class should accept |
|
|
``scale`` argument and have ``scale`` attribution which determines the |
|
|
resize shape. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
scale: Union[Tuple[int, int], Sequence[Tuple[int, int]]], |
|
|
ratio_range: Tuple[float, float] = None, |
|
|
resize_type: str = 'Resize', |
|
|
**resize_kwargs, |
|
|
) -> None: |
|
|
|
|
|
self.scale = scale |
|
|
self.ratio_range = ratio_range |
|
|
|
|
|
self.resize_cfg = dict(type=resize_type, **resize_kwargs) |
|
|
|
|
|
self.resize = TRANSFORMS.build({'scale': 0, **self.resize_cfg}) |
|
|
|
|
|
@staticmethod |
|
|
def _random_sample(scales: Sequence[Tuple[int, int]]) -> tuple: |
|
|
"""Private function to randomly sample a scale from a list of tuples. |
|
|
|
|
|
Args: |
|
|
scales (list[tuple]): Images scale range for sampling. |
|
|
There must be two tuples in scales, which specify the lower |
|
|
and upper bound of image scales. |
|
|
|
|
|
Returns: |
|
|
tuple: The targeted scale of the image to be resized. |
|
|
""" |
|
|
|
|
|
assert mmengine.is_list_of(scales, tuple) and len(scales) == 2 |
|
|
scale_0 = [scales[0][0], scales[1][0]] |
|
|
scale_1 = [scales[0][1], scales[1][1]] |
|
|
edge_0 = np.random.randint(min(scale_0), max(scale_0) + 1) |
|
|
edge_1 = np.random.randint(min(scale_1), max(scale_1) + 1) |
|
|
scale = (edge_0, edge_1) |
|
|
return scale |
|
|
|
|
|
@staticmethod |
|
|
def _random_sample_ratio(scale: tuple, ratio_range: Tuple[float, |
|
|
float]) -> tuple: |
|
|
"""Private function to randomly sample a scale from a tuple. |
|
|
|
|
|
A ratio will be randomly sampled from the range specified by |
|
|
``ratio_range``. Then it would be multiplied with ``scale`` to |
|
|
generate sampled scale. |
|
|
|
|
|
Args: |
|
|
scale (tuple): Images scale base to multiply with ratio. |
|
|
ratio_range (tuple[float]): The minimum and maximum ratio to scale |
|
|
the ``scale``. |
|
|
|
|
|
Returns: |
|
|
tuple: The targeted scale of the image to be resized. |
|
|
""" |
|
|
|
|
|
assert isinstance(scale, tuple) and len(scale) == 2 |
|
|
min_ratio, max_ratio = ratio_range |
|
|
assert min_ratio <= max_ratio |
|
|
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio |
|
|
scale = int(scale[0] * ratio), int(scale[1] * ratio) |
|
|
return scale |
|
|
|
|
|
@cache_randomness |
|
|
def _random_scale(self) -> tuple: |
|
|
"""Private function to randomly sample an scale according to the type |
|
|
of ``scale``. |
|
|
|
|
|
Returns: |
|
|
tuple: The targeted scale of the image to be resized. |
|
|
""" |
|
|
|
|
|
if mmengine.is_tuple_of(self.scale, int): |
|
|
assert self.ratio_range is not None and len(self.ratio_range) == 2 |
|
|
scale = self._random_sample_ratio( |
|
|
self.scale, |
|
|
self.ratio_range) |
|
|
elif mmengine.is_seq_of(self.scale, tuple): |
|
|
scale = self._random_sample(self.scale) |
|
|
else: |
|
|
raise NotImplementedError('Do not support sampling function ' |
|
|
f'for "{self.scale}"') |
|
|
|
|
|
return scale |
|
|
|
|
|
def transform(self, results: dict) -> dict: |
|
|
"""Transform function to resize images, bounding boxes, semantic |
|
|
segmentation map. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
|
|
|
Returns: |
|
|
dict: Resized results, ``img``, ``gt_bboxes``, ``gt_semantic_seg``, |
|
|
``gt_keypoints``, ``scale``, ``scale_factor``, ``img_shape``, and |
|
|
``keep_ratio`` keys are updated in result dict. |
|
|
""" |
|
|
results['scale'] = self._random_scale() |
|
|
self.resize.scale = results['scale'] |
|
|
results = self.resize(results) |
|
|
return results |
|
|
|
|
|
def __repr__(self) -> str: |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(scale={self.scale}, ' |
|
|
repr_str += f'ratio_range={self.ratio_range}, ' |
|
|
repr_str += f'resize_cfg={self.resize_cfg})' |
|
|
return repr_str |
|
|
|