Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from functools import partial | |
| from typing import Dict, List, Optional, Sequence, Tuple, Union | |
| import cv2 | |
| import numpy as np | |
| import xtcocotools.mask as cocomask | |
| from mmcv.image import imflip_, imresize | |
| from mmcv.image.geometric import imrescale | |
| from mmcv.transforms import BaseTransform | |
| from mmcv.transforms.utils import cache_randomness | |
| from scipy.stats import truncnorm | |
| from mmpose.registry import TRANSFORMS | |
| from mmpose.structures.bbox import (bbox_clip_border, bbox_corner2xyxy, | |
| bbox_xyxy2corner, get_pers_warp_matrix, | |
| get_udp_warp_matrix, get_warp_matrix) | |
| from mmpose.structures.keypoint import keypoint_clip_border | |
| class BottomupGetHeatmapMask(BaseTransform): | |
| """Generate the mask of valid regions from the segmentation annotation. | |
| Required Keys: | |
| - img_shape | |
| - invalid_segs (optional) | |
| - warp_mat (optional) | |
| - flip (optional) | |
| - flip_direction (optional) | |
| - heatmaps (optional) | |
| Added Keys: | |
| - heatmap_mask | |
| """ | |
| def __init__(self, get_invalid: bool = False): | |
| super().__init__() | |
| self.get_invalid = get_invalid | |
| def _segs_to_mask(self, segs: list, img_shape: Tuple[int, | |
| int]) -> np.ndarray: | |
| """Calculate mask from object segmentations. | |
| Args: | |
| segs (List): The object segmentation annotations in COCO format | |
| img_shape (Tuple): The image shape in (h, w) | |
| Returns: | |
| np.ndarray: The binary object mask in size (h, w), where the | |
| object pixels are 1 and background pixels are 0 | |
| """ | |
| # RLE is a simple yet efficient format for storing binary masks. | |
| # details can be found at `COCO tools <https://github.com/ | |
| # cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/ | |
| # mask.py>`__ | |
| rles = [] | |
| for seg in segs: | |
| if isinstance(seg, (tuple, list)): | |
| rle = cocomask.frPyObjects(seg, img_shape[0], img_shape[1]) | |
| if isinstance(rle, list): | |
| # For non-crowded objects (e.g. human with no visible | |
| # keypoints), the results is a list of rles | |
| rles.extend(rle) | |
| else: | |
| # For crowded objects, the result is a single rle | |
| rles.append(rle) | |
| if rles: | |
| mask = cocomask.decode(cocomask.merge(rles)) | |
| else: | |
| mask = np.zeros(img_shape, dtype=np.uint8) | |
| return mask | |
| def transform(self, results: Dict) -> Optional[dict]: | |
| """The transform function of :class:`BottomupGetHeatmapMask` to perform | |
| photometric distortion on images. | |
| See ``transform()`` method of :class:`BaseTransform` for details. | |
| Args: | |
| results (dict): Result dict from the data pipeline. | |
| Returns: | |
| dict: Result dict with images distorted. | |
| """ | |
| invalid_segs = results.get('invalid_segs', []) | |
| img_shape = results['img_shape'] # (img_h, img_w) | |
| input_size = results['input_size'] | |
| mask = self._segs_to_mask(invalid_segs, img_shape) | |
| if not self.get_invalid: | |
| # Calculate the mask of the valid region by negating the | |
| # segmentation mask of invalid objects | |
| mask = np.logical_not(mask) | |
| # Apply an affine transform to the mask if the image has been | |
| # transformed | |
| if 'warp_mat' in results: | |
| warp_mat = results['warp_mat'] | |
| mask = mask.astype(np.float32) | |
| mask = cv2.warpAffine( | |
| mask, warp_mat, input_size, flags=cv2.INTER_LINEAR) | |
| # Flip the mask if the image has been flipped | |
| if results.get('flip', False): | |
| flip_dir = results['flip_direction'] | |
| if flip_dir is not None: | |
| mask = imflip_(mask, flip_dir) | |
| # Resize the mask to the same size of heatmaps | |
| if 'heatmaps' in results: | |
| heatmaps = results['heatmaps'] | |
| if isinstance(heatmaps, list): | |
| # Multi-level heatmaps | |
| heatmap_mask = [] | |
| for hm in results['heatmaps']: | |
| h, w = hm.shape[1:3] | |
| _mask = imresize( | |
| mask, size=(w, h), interpolation='bilinear') | |
| heatmap_mask.append(_mask) | |
| else: | |
| h, w = heatmaps.shape[1:3] | |
| heatmap_mask = imresize( | |
| mask, size=(w, h), interpolation='bilinear') | |
| else: | |
| heatmap_mask = mask | |
| # Binarize the mask(s) | |
| if isinstance(heatmap_mask, list): | |
| results['heatmap_mask'] = [hm > 0.5 for hm in heatmap_mask] | |
| else: | |
| results['heatmap_mask'] = heatmap_mask > 0.5 | |
| return results | |
| class BottomupRandomAffine(BaseTransform): | |
| r"""Randomly shift, resize and rotate the image. | |
| Required Keys: | |
| - img | |
| - img_shape | |
| - keypoints (optional) | |
| Modified Keys: | |
| - img | |
| - keypoints (optional) | |
| Added Keys: | |
| - input_size | |
| - warp_mat | |
| Args: | |
| input_size (Tuple[int, int]): The input image size of the model in | |
| [w, h] | |
| shift_factor (float): Randomly shift the image in range | |
| :math:`[-dx, dx]` and :math:`[-dy, dy]` in X and Y directions, | |
| where :math:`dx(y) = img_w(h) \cdot shift_factor` in pixels. | |
| Defaults to 0.2 | |
| shift_prob (float): Probability of applying random shift. Defaults to | |
| 1.0 | |
| scale_factor (Tuple[float, float]): Randomly resize the image in range | |
| :math:`[scale_factor[0], scale_factor[1]]`. Defaults to | |
| (0.75, 1.5) | |
| scale_prob (float): Probability of applying random resizing. Defaults | |
| to 1.0 | |
| scale_type (str): wrt ``long`` or ``short`` length of the image. | |
| Defaults to ``short`` | |
| rotate_factor (float): Randomly rotate the bbox in | |
| :math:`[-rotate_factor, rotate_factor]` in degrees. Defaults | |
| to 40.0 | |
| use_udp (bool): Whether use unbiased data processing. See | |
| `UDP (CVPR 2020)`_ for details. Defaults to ``False`` | |
| .. _`UDP (CVPR 2020)`: https://arxiv.org/abs/1911.07524 | |
| """ | |
| def __init__(self, | |
| input_size: Optional[Tuple[int, int]] = None, | |
| shift_factor: float = 0.2, | |
| shift_prob: float = 1., | |
| scale_factor: Tuple[float, float] = (0.75, 1.5), | |
| scale_prob: float = 1., | |
| scale_type: str = 'short', | |
| rotate_factor: float = 30., | |
| rotate_prob: float = 1, | |
| shear_factor: float = 2.0, | |
| shear_prob: float = 1.0, | |
| use_udp: bool = False, | |
| pad_val: Union[float, Tuple[float]] = 0, | |
| border: Tuple[int, int] = (0, 0), | |
| distribution='trunc_norm', | |
| transform_mode='affine', | |
| bbox_keep_corner: bool = True, | |
| clip_border: bool = False) -> None: | |
| super().__init__() | |
| assert transform_mode in ('affine', 'affine_udp', 'perspective'), \ | |
| f'the argument transform_mode should be either \'affine\', ' \ | |
| f'\'affine_udp\' or \'perspective\', but got \'{transform_mode}\'' | |
| self.input_size = input_size | |
| self.shift_factor = shift_factor | |
| self.shift_prob = shift_prob | |
| self.scale_factor = scale_factor | |
| self.scale_prob = scale_prob | |
| self.scale_type = scale_type | |
| self.rotate_factor = rotate_factor | |
| self.rotate_prob = rotate_prob | |
| self.shear_factor = shear_factor | |
| self.shear_prob = shear_prob | |
| self.use_udp = use_udp | |
| self.distribution = distribution | |
| self.clip_border = clip_border | |
| self.bbox_keep_corner = bbox_keep_corner | |
| self.transform_mode = transform_mode | |
| if isinstance(pad_val, (int, float)): | |
| pad_val = (pad_val, pad_val, pad_val) | |
| if 'affine' in transform_mode: | |
| self._transform = partial( | |
| cv2.warpAffine, flags=cv2.INTER_LINEAR, borderValue=pad_val) | |
| else: | |
| self._transform = partial(cv2.warpPerspective, borderValue=pad_val) | |
| def _random(self, | |
| low: float = -1., | |
| high: float = 1., | |
| size: tuple = ()) -> np.ndarray: | |
| if self.distribution == 'trunc_norm': | |
| """Sample from a truncated normal distribution.""" | |
| return truncnorm.rvs(low, high, size=size).astype(np.float32) | |
| elif self.distribution == 'uniform': | |
| x = np.random.rand(*size) | |
| return x * (high - low) + low | |
| else: | |
| raise ValueError(f'the argument `distribution` should be either' | |
| f'\'trunc_norn\' or \'uniform\', but got ' | |
| f'{self.distribution}.') | |
| def _fix_aspect_ratio(self, scale: np.ndarray, aspect_ratio: float): | |
| """Extend the scale to match the given aspect ratio. | |
| Args: | |
| scale (np.ndarray): The image scale (w, h) in shape (2, ) | |
| aspect_ratio (float): The ratio of ``w/h`` | |
| Returns: | |
| np.ndarray: The reshaped image scale in (2, ) | |
| """ | |
| w, h = scale | |
| if w > h * aspect_ratio: | |
| if self.scale_type == 'long': | |
| _w, _h = w, w / aspect_ratio | |
| elif self.scale_type == 'short': | |
| _w, _h = h * aspect_ratio, h | |
| else: | |
| raise ValueError(f'Unknown scale type: {self.scale_type}') | |
| else: | |
| if self.scale_type == 'short': | |
| _w, _h = w, w / aspect_ratio | |
| elif self.scale_type == 'long': | |
| _w, _h = h * aspect_ratio, h | |
| else: | |
| raise ValueError(f'Unknown scale type: {self.scale_type}') | |
| return np.array([_w, _h], dtype=scale.dtype) | |
| def _get_transform_params(self) -> Tuple: | |
| """Get random transform parameters. | |
| Returns: | |
| tuple: | |
| - offset (np.ndarray): Image offset rate in shape (2, ) | |
| - scale (np.ndarray): Image scaling rate factor in shape (1, ) | |
| - rotate (np.ndarray): Image rotation degree in shape (1, ) | |
| """ | |
| # get offset | |
| if np.random.rand() < self.shift_prob: | |
| offset = self._random(size=(2, )) * self.shift_factor | |
| else: | |
| offset = np.zeros((2, ), dtype=np.float32) | |
| # get scale | |
| if np.random.rand() < self.scale_prob: | |
| scale_min, scale_max = self.scale_factor | |
| scale = scale_min + (scale_max - scale_min) * ( | |
| self._random(size=(1, )) + 1) / 2 | |
| else: | |
| scale = np.ones(1, dtype=np.float32) | |
| # get rotation | |
| if np.random.rand() < self.rotate_prob: | |
| rotate = self._random() * self.rotate_factor | |
| else: | |
| rotate = 0 | |
| # get shear | |
| if 'perspective' in self.transform_mode and np.random.rand( | |
| ) < self.shear_prob: | |
| shear = self._random(size=(2, )) * self.shear_factor | |
| else: | |
| shear = np.zeros((2, ), dtype=np.float32) | |
| return offset, scale, rotate, shear | |
| def transform(self, results: Dict) -> Optional[dict]: | |
| """The transform function of :class:`BottomupRandomAffine` to perform | |
| photometric distortion on images. | |
| See ``transform()`` method of :class:`BaseTransform` for details. | |
| Args: | |
| results (dict): Result dict from the data pipeline. | |
| Returns: | |
| dict: Result dict with images distorted. | |
| """ | |
| img_h, img_w = results['img_shape'][:2] | |
| w, h = self.input_size | |
| offset_rate, scale_rate, rotate, shear = self._get_transform_params() | |
| if 'affine' in self.transform_mode: | |
| offset = offset_rate * [img_w, img_h] | |
| scale = scale_rate * [img_w, img_h] | |
| # adjust the scale to match the target aspect ratio | |
| scale = self._fix_aspect_ratio(scale, aspect_ratio=w / h) | |
| if self.transform_mode == 'affine_udp': | |
| center = np.array([(img_w - 1.0) / 2, (img_h - 1.0) / 2], | |
| dtype=np.float32) | |
| warp_mat = get_udp_warp_matrix( | |
| center=center + offset, | |
| scale=scale, | |
| rot=rotate, | |
| output_size=(w, h)) | |
| else: | |
| center = np.array([img_w / 2, img_h / 2], dtype=np.float32) | |
| warp_mat = get_warp_matrix( | |
| center=center + offset, | |
| scale=scale, | |
| rot=rotate, | |
| output_size=(w, h)) | |
| else: | |
| offset = offset_rate * [w, h] | |
| center = np.array([w / 2, h / 2], dtype=np.float32) | |
| warp_mat = get_pers_warp_matrix( | |
| center=center, | |
| translate=offset, | |
| scale=scale_rate[0], | |
| rot=rotate, | |
| shear=shear) | |
| # warp image and keypoints | |
| results['img'] = self._transform(results['img'], warp_mat, | |
| (int(w), int(h))) | |
| if 'keypoints' in results: | |
| # Only transform (x, y) coordinates | |
| kpts = cv2.transform(results['keypoints'], warp_mat) | |
| if kpts.shape[-1] == 3: | |
| kpts = kpts[..., :2] / kpts[..., 2:3] | |
| results['keypoints'] = kpts | |
| if self.clip_border: | |
| results['keypoints'], results[ | |
| 'keypoints_visible'] = keypoint_clip_border( | |
| results['keypoints'], results['keypoints_visible'], | |
| (w, h)) | |
| if 'bbox' in results: | |
| bbox = bbox_xyxy2corner(results['bbox']) | |
| bbox = cv2.transform(bbox, warp_mat) | |
| if bbox.shape[-1] == 3: | |
| bbox = bbox[..., :2] / bbox[..., 2:3] | |
| if not self.bbox_keep_corner: | |
| bbox = bbox_corner2xyxy(bbox) | |
| if self.clip_border: | |
| bbox = bbox_clip_border(bbox, (w, h)) | |
| results['bbox'] = bbox | |
| if 'area' in results: | |
| warp_mat_for_area = warp_mat | |
| if warp_mat.shape[0] == 2: | |
| aux_row = np.array([[0.0, 0.0, 1.0]], dtype=warp_mat.dtype) | |
| warp_mat_for_area = np.concatenate((warp_mat, aux_row)) | |
| results['area'] *= np.linalg.det(warp_mat_for_area) | |
| results['input_size'] = self.input_size | |
| results['warp_mat'] = warp_mat | |
| return results | |
| class BottomupResize(BaseTransform): | |
| """Resize the image to the input size of the model. Optionally, the image | |
| can be resized to multiple sizes to build a image pyramid for multi-scale | |
| inference. | |
| Required Keys: | |
| - img | |
| - ori_shape | |
| Modified Keys: | |
| - img | |
| - img_shape | |
| Added Keys: | |
| - input_size | |
| - warp_mat | |
| - aug_scale | |
| Args: | |
| input_size (Tuple[int, int]): The input size of the model in [w, h]. | |
| Note that the actually size of the resized image will be affected | |
| by ``resize_mode`` and ``size_factor``, thus may not exactly equals | |
| to the ``input_size`` | |
| aug_scales (List[float], optional): The extra input scales for | |
| multi-scale testing. If given, the input image will be resized | |
| to different scales to build a image pyramid. And heatmaps from | |
| all scales will be aggregated to make final prediction. Defaults | |
| to ``None`` | |
| size_factor (int): The actual input size will be ceiled to | |
| a multiple of the `size_factor` value at both sides. | |
| Defaults to 16 | |
| resize_mode (str): The method to resize the image to the input size. | |
| Options are: | |
| - ``'fit'``: The image will be resized according to the | |
| relatively longer side with the aspect ratio kept. The | |
| resized image will entirely fits into the range of the | |
| input size | |
| - ``'expand'``: The image will be resized according to the | |
| relatively shorter side with the aspect ratio kept. The | |
| resized image will exceed the given input size at the | |
| longer side | |
| use_udp (bool): Whether use unbiased data processing. See | |
| `UDP (CVPR 2020)`_ for details. Defaults to ``False`` | |
| .. _`UDP (CVPR 2020)`: https://arxiv.org/abs/1911.07524 | |
| """ | |
| def __init__(self, | |
| input_size: Tuple[int, int], | |
| aug_scales: Optional[List[float]] = None, | |
| size_factor: int = 32, | |
| resize_mode: str = 'fit', | |
| pad_val: tuple = (0, 0, 0), | |
| use_udp: bool = False): | |
| super().__init__() | |
| self.input_size = input_size | |
| self.aug_scales = aug_scales | |
| self.resize_mode = resize_mode | |
| self.size_factor = size_factor | |
| self.use_udp = use_udp | |
| self.pad_val = pad_val | |
| def _ceil_to_multiple(size: Tuple[int, int], base: int): | |
| """Ceil the given size (tuple of [w, h]) to a multiple of the base.""" | |
| return tuple(int(np.ceil(s / base) * base) for s in size) | |
| def _get_input_size(self, img_size: Tuple[int, int], | |
| input_size: Tuple[int, int]) -> Tuple: | |
| """Calculate the actual input size (which the original image will be | |
| resized to) and the padded input size (which the resized image will be | |
| padded to, or which is the size of the model input). | |
| Args: | |
| img_size (Tuple[int, int]): The original image size in [w, h] | |
| input_size (Tuple[int, int]): The expected input size in [w, h] | |
| Returns: | |
| tuple: | |
| - actual_input_size (Tuple[int, int]): The target size to resize | |
| the image | |
| - padded_input_size (Tuple[int, int]): The target size to generate | |
| the model input which will contain the resized image | |
| """ | |
| img_w, img_h = img_size | |
| ratio = img_w / img_h | |
| if self.resize_mode == 'fit': | |
| padded_input_size = self._ceil_to_multiple(input_size, | |
| self.size_factor) | |
| if padded_input_size != input_size: | |
| raise ValueError( | |
| 'When ``resize_mode==\'fit\', the input size (height and' | |
| ' width) should be mulitples of the size_factor(' | |
| f'{self.size_factor}) at all scales. Got invalid input ' | |
| f'size {input_size}.') | |
| pad_w, pad_h = padded_input_size | |
| rsz_w = min(pad_w, pad_h * ratio) | |
| rsz_h = min(pad_h, pad_w / ratio) | |
| actual_input_size = (rsz_w, rsz_h) | |
| elif self.resize_mode == 'expand': | |
| _padded_input_size = self._ceil_to_multiple( | |
| input_size, self.size_factor) | |
| pad_w, pad_h = _padded_input_size | |
| rsz_w = max(pad_w, pad_h * ratio) | |
| rsz_h = max(pad_h, pad_w / ratio) | |
| actual_input_size = (rsz_w, rsz_h) | |
| padded_input_size = self._ceil_to_multiple(actual_input_size, | |
| self.size_factor) | |
| else: | |
| raise ValueError(f'Invalid resize mode {self.resize_mode}') | |
| return actual_input_size, padded_input_size | |
| def transform(self, results: Dict) -> Optional[dict]: | |
| """The transform function of :class:`BottomupResize` to perform | |
| photometric distortion on images. | |
| See ``transform()`` method of :class:`BaseTransform` for details. | |
| Args: | |
| results (dict): Result dict from the data pipeline. | |
| Returns: | |
| dict: Result dict with images distorted. | |
| """ | |
| img = results['img'] | |
| img_h, img_w = results['ori_shape'] | |
| w, h = self.input_size | |
| input_sizes = [(w, h)] | |
| if self.aug_scales: | |
| input_sizes += [(int(w * s), int(h * s)) for s in self.aug_scales] | |
| imgs = [] | |
| for i, (_w, _h) in enumerate(input_sizes): | |
| actual_input_size, padded_input_size = self._get_input_size( | |
| img_size=(img_w, img_h), input_size=(_w, _h)) | |
| if self.use_udp: | |
| center = np.array([(img_w - 1.0) / 2, (img_h - 1.0) / 2], | |
| dtype=np.float32) | |
| scale = np.array([img_w, img_h], dtype=np.float32) | |
| warp_mat = get_udp_warp_matrix( | |
| center=center, | |
| scale=scale, | |
| rot=0, | |
| output_size=actual_input_size) | |
| else: | |
| center = np.array([img_w / 2, img_h / 2], dtype=np.float32) | |
| scale = np.array([ | |
| img_w * padded_input_size[0] / actual_input_size[0], | |
| img_h * padded_input_size[1] / actual_input_size[1] | |
| ], | |
| dtype=np.float32) | |
| warp_mat = get_warp_matrix( | |
| center=center, | |
| scale=scale, | |
| rot=0, | |
| output_size=padded_input_size) | |
| _img = cv2.warpAffine( | |
| img, | |
| warp_mat, | |
| padded_input_size, | |
| flags=cv2.INTER_LINEAR, | |
| borderValue=self.pad_val) | |
| imgs.append(_img) | |
| # Store the transform information w.r.t. the main input size | |
| if i == 0: | |
| results['img_shape'] = padded_input_size[::-1] | |
| results['input_center'] = center | |
| results['input_scale'] = scale | |
| results['input_size'] = padded_input_size | |
| if self.aug_scales: | |
| results['img'] = imgs | |
| results['aug_scales'] = self.aug_scales | |
| else: | |
| results['img'] = imgs[0] | |
| results['aug_scale'] = None | |
| return results | |
| class BottomupRandomCrop(BaseTransform): | |
| """Random crop the image & bboxes & masks. | |
| The absolute ``crop_size`` is sampled based on ``crop_type`` and | |
| ``image_size``, then the cropped results are generated. | |
| Required Keys: | |
| - img | |
| - keypoints | |
| - bbox (optional) | |
| - masks (BitmapMasks | PolygonMasks) (optional) | |
| Modified Keys: | |
| - img | |
| - img_shape | |
| - keypoints | |
| - keypoints_visible | |
| - num_keypoints | |
| - bbox (optional) | |
| - bbox_score (optional) | |
| - id (optional) | |
| - category_id (optional) | |
| - raw_ann_info (optional) | |
| - iscrowd (optional) | |
| - segmentation (optional) | |
| - masks (optional) | |
| Added Keys: | |
| - warp_mat | |
| Args: | |
| crop_size (tuple): The relative ratio or absolute pixels of | |
| (width, height). | |
| crop_type (str, optional): One of "relative_range", "relative", | |
| "absolute", "absolute_range". "relative" randomly crops | |
| (h * crop_size[0], w * crop_size[1]) part from an input of size | |
| (h, w). "relative_range" uniformly samples relative crop size from | |
| range [crop_size[0], 1] and [crop_size[1], 1] for height and width | |
| respectively. "absolute" crops from an input with absolute size | |
| (crop_size[0], crop_size[1]). "absolute_range" uniformly samples | |
| crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w | |
| in range [crop_size[0], min(w, crop_size[1])]. | |
| Defaults to "absolute". | |
| allow_negative_crop (bool, optional): Whether to allow a crop that does | |
| not contain any bbox area. Defaults to False. | |
| recompute_bbox (bool, optional): Whether to re-compute the boxes based | |
| on cropped instance masks. Defaults to False. | |
| bbox_clip_border (bool, optional): Whether clip the objects outside | |
| the border of the image. Defaults to True. | |
| Note: | |
| - If the image is smaller than the absolute crop size, return the | |
| original image. | |
| - If the crop does not contain any gt-bbox region and | |
| ``allow_negative_crop`` is set to False, skip this image. | |
| """ | |
| def __init__(self, | |
| crop_size: tuple, | |
| crop_type: str = 'absolute', | |
| allow_negative_crop: bool = False, | |
| recompute_bbox: bool = False, | |
| bbox_clip_border: bool = True) -> None: | |
| if crop_type not in [ | |
| 'relative_range', 'relative', 'absolute', 'absolute_range' | |
| ]: | |
| raise ValueError(f'Invalid crop_type {crop_type}.') | |
| if crop_type in ['absolute', 'absolute_range']: | |
| assert crop_size[0] > 0 and crop_size[1] > 0 | |
| assert isinstance(crop_size[0], int) and isinstance( | |
| crop_size[1], int) | |
| if crop_type == 'absolute_range': | |
| assert crop_size[0] <= crop_size[1] | |
| else: | |
| assert 0 < crop_size[0] <= 1 and 0 < crop_size[1] <= 1 | |
| self.crop_size = crop_size | |
| self.crop_type = crop_type | |
| self.allow_negative_crop = allow_negative_crop | |
| self.bbox_clip_border = bbox_clip_border | |
| self.recompute_bbox = recompute_bbox | |
| def _crop_data(self, results: dict, crop_size: Tuple[int, int], | |
| allow_negative_crop: bool) -> Union[dict, None]: | |
| """Function to randomly crop images, bounding boxes, masks, semantic | |
| segmentation maps. | |
| Args: | |
| results (dict): Result dict from loading pipeline. | |
| crop_size (Tuple[int, int]): Expected absolute size after | |
| cropping, (h, w). | |
| allow_negative_crop (bool): Whether to allow a crop that does not | |
| contain any bbox area. | |
| Returns: | |
| results (Union[dict, None]): Randomly cropped results, 'img_shape' | |
| key in result dict is updated according to crop size. None will | |
| be returned when there is no valid bbox after cropping. | |
| """ | |
| assert crop_size[0] > 0 and crop_size[1] > 0 | |
| img = results['img'] | |
| margin_h = max(img.shape[0] - crop_size[0], 0) | |
| margin_w = max(img.shape[1] - crop_size[1], 0) | |
| offset_h, offset_w = self._rand_offset((margin_h, margin_w)) | |
| crop_y1, crop_y2 = offset_h, offset_h + crop_size[0] | |
| crop_x1, crop_x2 = offset_w, offset_w + crop_size[1] | |
| # Record the warp matrix for the RandomCrop | |
| warp_mat = np.array([[1, 0, -offset_w], [0, 1, -offset_h], [0, 0, 1]], | |
| dtype=np.float32) | |
| if results.get('warp_mat', None) is None: | |
| results['warp_mat'] = warp_mat | |
| else: | |
| results['warp_mat'] = warp_mat @ results['warp_mat'] | |
| # crop the image | |
| img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] | |
| img_shape = img.shape | |
| results['img'] = img | |
| results['img_shape'] = img_shape[:2] | |
| # crop bboxes accordingly and clip to the image boundary | |
| if results.get('bbox', None) is not None: | |
| distances = (-offset_w, -offset_h) | |
| bboxes = results['bbox'] | |
| bboxes = bboxes + np.tile(np.asarray(distances), 2) | |
| if self.bbox_clip_border: | |
| bboxes[..., 0::2] = bboxes[..., 0::2].clip(0, img_shape[1]) | |
| bboxes[..., 1::2] = bboxes[..., 1::2].clip(0, img_shape[0]) | |
| valid_inds = (bboxes[..., 0] < img_shape[1]) & \ | |
| (bboxes[..., 1] < img_shape[0]) & \ | |
| (bboxes[..., 2] > 0) & \ | |
| (bboxes[..., 3] > 0) | |
| # If the crop does not contain any gt-bbox area and | |
| # allow_negative_crop is False, skip this image. | |
| if (not valid_inds.any() and not allow_negative_crop): | |
| return None | |
| results['bbox'] = bboxes[valid_inds] | |
| meta_keys = [ | |
| 'bbox_score', 'id', 'category_id', 'raw_ann_info', 'iscrowd' | |
| ] | |
| for key in meta_keys: | |
| if results.get(key): | |
| if isinstance(results[key], list): | |
| results[key] = np.asarray( | |
| results[key])[valid_inds].tolist() | |
| else: | |
| results[key] = results[key][valid_inds] | |
| if results.get('keypoints', None) is not None: | |
| keypoints = results['keypoints'] | |
| distances = np.asarray(distances).reshape(1, 1, 2) | |
| keypoints = keypoints + distances | |
| if self.bbox_clip_border: | |
| keypoints_outside_x = keypoints[:, :, 0] < 0 | |
| keypoints_outside_y = keypoints[:, :, 1] < 0 | |
| keypoints_outside_width = keypoints[:, :, 0] > img_shape[1] | |
| keypoints_outside_height = keypoints[:, :, | |
| 1] > img_shape[0] | |
| kpt_outside = np.logical_or.reduce( | |
| (keypoints_outside_x, keypoints_outside_y, | |
| keypoints_outside_width, keypoints_outside_height)) | |
| results['keypoints_visible'][kpt_outside] *= 0 | |
| keypoints[:, :, 0] = keypoints[:, :, 0].clip(0, img_shape[1]) | |
| keypoints[:, :, 1] = keypoints[:, :, 1].clip(0, img_shape[0]) | |
| results['keypoints'] = keypoints[valid_inds] | |
| results['keypoints_visible'] = results['keypoints_visible'][ | |
| valid_inds] | |
| if results.get('segmentation', None) is not None: | |
| results['segmentation'] = results['segmentation'][ | |
| crop_y1:crop_y2, crop_x1:crop_x2] | |
| if results.get('masks', None) is not None: | |
| results['masks'] = results['masks'][valid_inds.nonzero( | |
| )[0]].crop(np.asarray([crop_x1, crop_y1, crop_x2, crop_y2])) | |
| if self.recompute_bbox: | |
| results['bbox'] = results['masks'].get_bboxes( | |
| type(results['bbox'])) | |
| return results | |
| def _rand_offset(self, margin: Tuple[int, int]) -> Tuple[int, int]: | |
| """Randomly generate crop offset. | |
| Args: | |
| margin (Tuple[int, int]): The upper bound for the offset generated | |
| randomly. | |
| Returns: | |
| Tuple[int, int]: The random offset for the crop. | |
| """ | |
| margin_h, margin_w = margin | |
| offset_h = np.random.randint(0, margin_h + 1) | |
| offset_w = np.random.randint(0, margin_w + 1) | |
| return offset_h, offset_w | |
| def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]: | |
| """Randomly generates the absolute crop size based on `crop_type` and | |
| `image_size`. | |
| Args: | |
| image_size (Tuple[int, int]): (h, w). | |
| Returns: | |
| crop_size (Tuple[int, int]): (crop_h, crop_w) in absolute pixels. | |
| """ | |
| h, w = image_size | |
| if self.crop_type == 'absolute': | |
| return min(self.crop_size[1], h), min(self.crop_size[0], w) | |
| elif self.crop_type == 'absolute_range': | |
| crop_h = np.random.randint( | |
| min(h, self.crop_size[0]), | |
| min(h, self.crop_size[1]) + 1) | |
| crop_w = np.random.randint( | |
| min(w, self.crop_size[0]), | |
| min(w, self.crop_size[1]) + 1) | |
| return crop_h, crop_w | |
| elif self.crop_type == 'relative': | |
| crop_w, crop_h = self.crop_size | |
| return int(h * crop_h + 0.5), int(w * crop_w + 0.5) | |
| else: | |
| # 'relative_range' | |
| crop_size = np.asarray(self.crop_size, dtype=np.float32) | |
| crop_h, crop_w = crop_size + np.random.rand(2) * (1 - crop_size) | |
| return int(h * crop_h + 0.5), int(w * crop_w + 0.5) | |
| def transform(self, results: dict) -> Union[dict, None]: | |
| """Transform function to randomly crop images, bounding boxes, masks, | |
| semantic segmentation maps. | |
| Args: | |
| results (dict): Result dict from loading pipeline. | |
| Returns: | |
| results (Union[dict, None]): Randomly cropped results, 'img_shape' | |
| key in result dict is updated according to crop size. None will | |
| be returned when there is no valid bbox after cropping. | |
| """ | |
| image_size = results['img'].shape[:2] | |
| crop_size = self._get_crop_size(image_size) | |
| results = self._crop_data(results, crop_size, self.allow_negative_crop) | |
| return results | |
| class BottomupRandomChoiceResize(BaseTransform): | |
| """Resize images & bbox & mask from a list of multiple scales. | |
| This transform resizes the input image to some scale. Bboxes and masks are | |
| then resized with the same scale factor. Resize scale will be randomly | |
| selected from ``scales``. | |
| How to choose the target scale to resize the image will follow the rules | |
| below: | |
| - if `scale` is a list of tuple, the target scale is sampled from the list | |
| uniformally. | |
| - if `scale` is a tuple, the target scale will be set to the tuple. | |
| Required Keys: | |
| - img | |
| - bbox | |
| - keypoints | |
| Modified Keys: | |
| - img | |
| - img_shape | |
| - bbox | |
| - keypoints | |
| Added Keys: | |
| - scale | |
| - scale_factor | |
| - scale_idx | |
| Args: | |
| scales (Union[list, Tuple]): Images scales for resizing. | |
| **resize_kwargs: Other keyword arguments for the ``resize_type``. | |
| """ | |
| def __init__( | |
| self, | |
| scales: Sequence[Union[int, Tuple]], | |
| keep_ratio: bool = False, | |
| clip_object_border: bool = True, | |
| backend: str = 'cv2', | |
| **resize_kwargs, | |
| ) -> None: | |
| super().__init__() | |
| if isinstance(scales, list): | |
| self.scales = scales | |
| else: | |
| self.scales = [scales] | |
| self.keep_ratio = keep_ratio | |
| self.clip_object_border = clip_object_border | |
| self.backend = backend | |
| def _random_select(self) -> Tuple[int, int]: | |
| """Randomly select an scale from given candidates. | |
| Returns: | |
| (tuple, int): Returns a tuple ``(scale, scale_dix)``, | |
| where ``scale`` is the selected image scale and | |
| ``scale_idx`` is the selected index in the given candidates. | |
| """ | |
| scale_idx = np.random.randint(len(self.scales)) | |
| scale = self.scales[scale_idx] | |
| return scale, scale_idx | |
| def _resize_img(self, results: dict) -> None: | |
| """Resize images with ``self.scale``.""" | |
| if self.keep_ratio: | |
| img, scale_factor = imrescale( | |
| results['img'], | |
| self.scale, | |
| interpolation='bilinear', | |
| return_scale=True, | |
| backend=self.backend) | |
| # the w_scale and h_scale has minor difference | |
| # a real fix should be done in the mmcv.imrescale in the future | |
| new_h, new_w = img.shape[:2] | |
| h, w = results['img'].shape[:2] | |
| w_scale = new_w / w | |
| h_scale = new_h / h | |
| else: | |
| img, w_scale, h_scale = imresize( | |
| results['img'], | |
| self.scale, | |
| interpolation='bilinear', | |
| return_scale=True, | |
| backend=self.backend) | |
| results['img'] = img | |
| results['img_shape'] = img.shape[:2] | |
| results['scale_factor'] = (w_scale, h_scale) | |
| results['input_size'] = img.shape[:2] | |
| w, h = results['ori_shape'] | |
| center = np.array([w / 2, h / 2], dtype=np.float32) | |
| scale = np.array([w, h], dtype=np.float32) | |
| results['input_center'] = center | |
| results['input_scale'] = scale | |
| def _resize_bboxes(self, results: dict) -> None: | |
| """Resize bounding boxes with ``self.scale``.""" | |
| if results.get('bbox', None) is not None: | |
| bboxes = results['bbox'] * np.tile( | |
| np.array(results['scale_factor']), 2) | |
| if self.clip_object_border: | |
| bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, | |
| results['img_shape'][1]) | |
| bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, | |
| results['img_shape'][0]) | |
| results['bbox'] = bboxes | |
| def _resize_keypoints(self, results: dict) -> None: | |
| """Resize keypoints with ``self.scale``.""" | |
| if results.get('keypoints', None) is not None: | |
| keypoints = results['keypoints'] | |
| keypoints[:, :, :2] = keypoints[:, :, :2] * np.array( | |
| results['scale_factor']) | |
| if self.clip_object_border: | |
| keypoints[:, :, 0] = np.clip(keypoints[:, :, 0], 0, | |
| results['img_shape'][1]) | |
| keypoints[:, :, 1] = np.clip(keypoints[:, :, 1], 0, | |
| results['img_shape'][0]) | |
| results['keypoints'] = keypoints | |
| def transform(self, results: dict) -> dict: | |
| """Apply resize transforms on results from a list of scales. | |
| Args: | |
| results (dict): Result dict contains the data to transform. | |
| Returns: | |
| dict: Resized results, 'img', 'bbox', | |
| 'keypoints', 'scale', 'scale_factor', 'img_shape', | |
| and 'keep_ratio' keys are updated in result dict. | |
| """ | |
| target_scale, scale_idx = self._random_select() | |
| self.scale = target_scale | |
| self._resize_img(results) | |
| self._resize_bboxes(results) | |
| self._resize_keypoints(results) | |
| results['scale_idx'] = scale_idx | |
| return results | |