|
|
|
|
|
import random
|
|
|
import warnings
|
|
|
from numbers import Number
|
|
|
from typing import Sequence
|
|
|
|
|
|
import cv2
|
|
|
import mmcv
|
|
|
import mmengine
|
|
|
import numpy as np
|
|
|
from mmcv.transforms import BaseTransform
|
|
|
from mmcv.transforms.utils import cache_randomness
|
|
|
from torch.nn.modules.utils import _pair
|
|
|
|
|
|
from mmaction.registry import TRANSFORMS
|
|
|
|
|
|
|
|
|
def _combine_quadruple(a, b):
|
|
|
return a[0] + a[2] * b[0], a[1] + a[3] * b[1], a[2] * b[2], a[3] * b[3]
|
|
|
|
|
|
|
|
|
def _flip_quadruple(a):
|
|
|
return 1 - a[0] - a[2], a[1], a[2], a[3]
|
|
|
|
|
|
|
|
|
def _init_lazy_if_proper(results, lazy):
|
|
|
"""Initialize lazy operation properly.
|
|
|
|
|
|
Make sure that a lazy operation is properly initialized,
|
|
|
and avoid a non-lazy operation accidentally getting mixed in.
|
|
|
|
|
|
Required keys in results are "imgs" if "img_shape" not in results,
|
|
|
otherwise, Required keys in results are "img_shape", add or modified keys
|
|
|
are "img_shape", "lazy".
|
|
|
Add or modified keys in "lazy" are "original_shape", "crop_bbox", "flip",
|
|
|
"flip_direction", "interpolation".
|
|
|
|
|
|
Args:
|
|
|
results (dict): A dict stores data pipeline result.
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
if 'img_shape' not in results:
|
|
|
results['img_shape'] = results['imgs'][0].shape[:2]
|
|
|
if lazy:
|
|
|
if 'lazy' not in results:
|
|
|
img_h, img_w = results['img_shape']
|
|
|
lazyop = dict()
|
|
|
lazyop['original_shape'] = results['img_shape']
|
|
|
lazyop['crop_bbox'] = np.array([0, 0, img_w, img_h],
|
|
|
dtype=np.float32)
|
|
|
lazyop['flip'] = False
|
|
|
lazyop['flip_direction'] = None
|
|
|
lazyop['interpolation'] = None
|
|
|
results['lazy'] = lazyop
|
|
|
else:
|
|
|
assert 'lazy' not in results, 'Use Fuse after lazy operations'
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class Fuse(BaseTransform):
|
|
|
"""Fuse lazy operations.
|
|
|
|
|
|
Fusion order:
|
|
|
crop -> resize -> flip
|
|
|
|
|
|
Required keys are "imgs", "img_shape" and "lazy", added or modified keys
|
|
|
are "imgs", "lazy".
|
|
|
Required keys in "lazy" are "crop_bbox", "interpolation", "flip_direction".
|
|
|
"""
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Fuse lazy operations.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
if 'lazy' not in results:
|
|
|
raise ValueError('No lazy operation detected')
|
|
|
lazyop = results['lazy']
|
|
|
imgs = results['imgs']
|
|
|
|
|
|
|
|
|
left, top, right, bottom = lazyop['crop_bbox'].round().astype(int)
|
|
|
imgs = [img[top:bottom, left:right] for img in imgs]
|
|
|
|
|
|
|
|
|
img_h, img_w = results['img_shape']
|
|
|
if lazyop['interpolation'] is None:
|
|
|
interpolation = 'bilinear'
|
|
|
else:
|
|
|
interpolation = lazyop['interpolation']
|
|
|
imgs = [
|
|
|
mmcv.imresize(img, (img_w, img_h), interpolation=interpolation)
|
|
|
for img in imgs
|
|
|
]
|
|
|
|
|
|
|
|
|
if lazyop['flip']:
|
|
|
for img in imgs:
|
|
|
mmcv.imflip_(img, lazyop['flip_direction'])
|
|
|
|
|
|
results['imgs'] = imgs
|
|
|
del results['lazy']
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class RandomCrop(BaseTransform):
|
|
|
"""Vanilla square random crop that specifics the output size.
|
|
|
|
|
|
Required keys in results are "img_shape", "keypoint" (optional), "imgs"
|
|
|
(optional), added or modified keys are "keypoint", "imgs", "lazy"; Required
|
|
|
keys in "lazy" are "flip", "crop_bbox", added or modified key is
|
|
|
"crop_bbox".
|
|
|
|
|
|
Args:
|
|
|
size (int): The output size of the images.
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, size, lazy=False):
|
|
|
if not isinstance(size, int):
|
|
|
raise TypeError(f'Size must be an int, but got {type(size)}')
|
|
|
self.size = size
|
|
|
self.lazy = lazy
|
|
|
|
|
|
@staticmethod
|
|
|
def _crop_kps(kps, crop_bbox):
|
|
|
"""Static method for cropping keypoint."""
|
|
|
return kps - crop_bbox[:2]
|
|
|
|
|
|
@staticmethod
|
|
|
def _crop_imgs(imgs, crop_bbox):
|
|
|
"""Static method for cropping images."""
|
|
|
x1, y1, x2, y2 = crop_bbox
|
|
|
return [img[y1:y2, x1:x2] for img in imgs]
|
|
|
|
|
|
@staticmethod
|
|
|
def _box_crop(box, crop_bbox):
|
|
|
"""Crop the bounding boxes according to the crop_bbox.
|
|
|
|
|
|
Args:
|
|
|
box (np.ndarray): The bounding boxes.
|
|
|
crop_bbox(np.ndarray): The bbox used to crop the original image.
|
|
|
"""
|
|
|
|
|
|
x1, y1, x2, y2 = crop_bbox
|
|
|
img_w, img_h = x2 - x1, y2 - y1
|
|
|
|
|
|
box_ = box.copy()
|
|
|
box_[..., 0::2] = np.clip(box[..., 0::2] - x1, 0, img_w - 1)
|
|
|
box_[..., 1::2] = np.clip(box[..., 1::2] - y1, 0, img_h - 1)
|
|
|
return box_
|
|
|
|
|
|
def _all_box_crop(self, results, crop_bbox):
|
|
|
"""Crop the gt_bboxes and proposals in results according to crop_bbox.
|
|
|
|
|
|
Args:
|
|
|
results (dict): All information about the sample, which contain
|
|
|
'gt_bboxes' and 'proposals' (optional).
|
|
|
crop_bbox(np.ndarray): The bbox used to crop the original image.
|
|
|
"""
|
|
|
results['gt_bboxes'] = self._box_crop(results['gt_bboxes'], crop_bbox)
|
|
|
if 'proposals' in results and results['proposals'] is not None:
|
|
|
assert results['proposals'].shape[1] == 4
|
|
|
results['proposals'] = self._box_crop(results['proposals'],
|
|
|
crop_bbox)
|
|
|
return results
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the RandomCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
|
|
|
img_h, img_w = results['img_shape']
|
|
|
assert self.size <= img_h and self.size <= img_w
|
|
|
|
|
|
y_offset = 0
|
|
|
x_offset = 0
|
|
|
if img_h > self.size:
|
|
|
y_offset = int(np.random.randint(0, img_h - self.size))
|
|
|
if img_w > self.size:
|
|
|
x_offset = int(np.random.randint(0, img_w - self.size))
|
|
|
|
|
|
if 'crop_quadruple' not in results:
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
[0, 0, 1, 1],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
x_ratio, y_ratio = x_offset / img_w, y_offset / img_h
|
|
|
w_ratio, h_ratio = self.size / img_w, self.size / img_h
|
|
|
|
|
|
old_crop_quadruple = results['crop_quadruple']
|
|
|
old_x_ratio, old_y_ratio = old_crop_quadruple[0], old_crop_quadruple[1]
|
|
|
old_w_ratio, old_h_ratio = old_crop_quadruple[2], old_crop_quadruple[3]
|
|
|
new_crop_quadruple = [
|
|
|
old_x_ratio + x_ratio * old_w_ratio,
|
|
|
old_y_ratio + y_ratio * old_h_ratio, w_ratio * old_w_ratio,
|
|
|
h_ratio * old_h_ratio
|
|
|
]
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
new_crop_quadruple, dtype=np.float32)
|
|
|
|
|
|
new_h, new_w = self.size, self.size
|
|
|
|
|
|
crop_bbox = np.array(
|
|
|
[x_offset, y_offset, x_offset + new_w, y_offset + new_h])
|
|
|
results['crop_bbox'] = crop_bbox
|
|
|
|
|
|
results['img_shape'] = (new_h, new_w)
|
|
|
|
|
|
if not self.lazy:
|
|
|
if 'keypoint' in results:
|
|
|
results['keypoint'] = self._crop_kps(results['keypoint'],
|
|
|
crop_bbox)
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._crop_imgs(results['imgs'], crop_bbox)
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Put Flip at last for now')
|
|
|
|
|
|
|
|
|
lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox']
|
|
|
left = x_offset * (lazy_right - lazy_left) / img_w
|
|
|
right = (x_offset + new_w) * (lazy_right - lazy_left) / img_w
|
|
|
top = y_offset * (lazy_bottom - lazy_top) / img_h
|
|
|
bottom = (y_offset + new_h) * (lazy_bottom - lazy_top) / img_h
|
|
|
lazyop['crop_bbox'] = np.array([(lazy_left + left),
|
|
|
(lazy_top + top),
|
|
|
(lazy_left + right),
|
|
|
(lazy_top + bottom)],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
|
|
|
if 'gt_bboxes' in results:
|
|
|
assert not self.lazy
|
|
|
results = self._all_box_crop(results, results['crop_bbox'])
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}(size={self.size}, '
|
|
|
f'lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class RandomResizedCrop(RandomCrop):
|
|
|
"""Random crop that specifics the area and height-weight ratio range.
|
|
|
|
|
|
Required keys in results are "img_shape", "crop_bbox", "imgs" (optional),
|
|
|
"keypoint" (optional), added or modified keys are "imgs", "keypoint",
|
|
|
"crop_bbox" and "lazy"; Required keys in "lazy" are "flip", "crop_bbox",
|
|
|
added or modified key is "crop_bbox".
|
|
|
|
|
|
Args:
|
|
|
area_range (Tuple[float]): The candidate area scales range of
|
|
|
output cropped images. Default: (0.08, 1.0).
|
|
|
aspect_ratio_range (Tuple[float]): The candidate aspect ratio range of
|
|
|
output cropped images. Default: (3 / 4, 4 / 3).
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
def __init__(self,
|
|
|
area_range=(0.08, 1.0),
|
|
|
aspect_ratio_range=(3 / 4, 4 / 3),
|
|
|
lazy=False):
|
|
|
self.area_range = area_range
|
|
|
self.aspect_ratio_range = aspect_ratio_range
|
|
|
self.lazy = lazy
|
|
|
if not mmengine.is_tuple_of(self.area_range, float):
|
|
|
raise TypeError(f'Area_range must be a tuple of float, '
|
|
|
f'but got {type(area_range)}')
|
|
|
if not mmengine.is_tuple_of(self.aspect_ratio_range, float):
|
|
|
raise TypeError(f'Aspect_ratio_range must be a tuple of float, '
|
|
|
f'but got {type(aspect_ratio_range)}')
|
|
|
|
|
|
@staticmethod
|
|
|
def get_crop_bbox(img_shape,
|
|
|
area_range,
|
|
|
aspect_ratio_range,
|
|
|
max_attempts=10):
|
|
|
"""Get a crop bbox given the area range and aspect ratio range.
|
|
|
|
|
|
Args:
|
|
|
img_shape (Tuple[int]): Image shape
|
|
|
area_range (Tuple[float]): The candidate area scales range of
|
|
|
output cropped images. Default: (0.08, 1.0).
|
|
|
aspect_ratio_range (Tuple[float]): The candidate aspect
|
|
|
ratio range of output cropped images. Default: (3 / 4, 4 / 3).
|
|
|
max_attempts (int): The maximum of attempts. Default: 10.
|
|
|
max_attempts (int): Max attempts times to generate random candidate
|
|
|
bounding box. If it doesn't qualified one, the center bounding
|
|
|
box will be used.
|
|
|
Returns:
|
|
|
(list[int]) A random crop bbox within the area range and aspect
|
|
|
ratio range.
|
|
|
"""
|
|
|
assert 0 < area_range[0] <= area_range[1] <= 1
|
|
|
assert 0 < aspect_ratio_range[0] <= aspect_ratio_range[1]
|
|
|
|
|
|
img_h, img_w = img_shape
|
|
|
area = img_h * img_w
|
|
|
|
|
|
min_ar, max_ar = aspect_ratio_range
|
|
|
aspect_ratios = np.exp(
|
|
|
np.random.uniform(
|
|
|
np.log(min_ar), np.log(max_ar), size=max_attempts))
|
|
|
target_areas = np.random.uniform(*area_range, size=max_attempts) * area
|
|
|
candidate_crop_w = np.round(np.sqrt(target_areas *
|
|
|
aspect_ratios)).astype(np.int32)
|
|
|
candidate_crop_h = np.round(np.sqrt(target_areas /
|
|
|
aspect_ratios)).astype(np.int32)
|
|
|
|
|
|
for i in range(max_attempts):
|
|
|
crop_w = candidate_crop_w[i]
|
|
|
crop_h = candidate_crop_h[i]
|
|
|
if crop_h <= img_h and crop_w <= img_w:
|
|
|
x_offset = random.randint(0, img_w - crop_w)
|
|
|
y_offset = random.randint(0, img_h - crop_h)
|
|
|
return x_offset, y_offset, x_offset + crop_w, y_offset + crop_h
|
|
|
|
|
|
|
|
|
crop_size = min(img_h, img_w)
|
|
|
x_offset = (img_w - crop_size) // 2
|
|
|
y_offset = (img_h - crop_size) // 2
|
|
|
return x_offset, y_offset, x_offset + crop_size, y_offset + crop_size
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the RandomResizeCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
|
|
|
img_h, img_w = results['img_shape']
|
|
|
|
|
|
left, top, right, bottom = self.get_crop_bbox(
|
|
|
(img_h, img_w), self.area_range, self.aspect_ratio_range)
|
|
|
new_h, new_w = bottom - top, right - left
|
|
|
|
|
|
if 'crop_quadruple' not in results:
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
[0, 0, 1, 1],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
x_ratio, y_ratio = left / img_w, top / img_h
|
|
|
w_ratio, h_ratio = new_w / img_w, new_h / img_h
|
|
|
|
|
|
old_crop_quadruple = results['crop_quadruple']
|
|
|
old_x_ratio, old_y_ratio = old_crop_quadruple[0], old_crop_quadruple[1]
|
|
|
old_w_ratio, old_h_ratio = old_crop_quadruple[2], old_crop_quadruple[3]
|
|
|
new_crop_quadruple = [
|
|
|
old_x_ratio + x_ratio * old_w_ratio,
|
|
|
old_y_ratio + y_ratio * old_h_ratio, w_ratio * old_w_ratio,
|
|
|
h_ratio * old_h_ratio
|
|
|
]
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
new_crop_quadruple, dtype=np.float32)
|
|
|
|
|
|
crop_bbox = np.array([left, top, right, bottom])
|
|
|
results['crop_bbox'] = crop_bbox
|
|
|
results['img_shape'] = (new_h, new_w)
|
|
|
|
|
|
if not self.lazy:
|
|
|
if 'keypoint' in results:
|
|
|
results['keypoint'] = self._crop_kps(results['keypoint'],
|
|
|
crop_bbox)
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._crop_imgs(results['imgs'], crop_bbox)
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Put Flip at last for now')
|
|
|
|
|
|
|
|
|
lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox']
|
|
|
left = left * (lazy_right - lazy_left) / img_w
|
|
|
right = right * (lazy_right - lazy_left) / img_w
|
|
|
top = top * (lazy_bottom - lazy_top) / img_h
|
|
|
bottom = bottom * (lazy_bottom - lazy_top) / img_h
|
|
|
lazyop['crop_bbox'] = np.array([(lazy_left + left),
|
|
|
(lazy_top + top),
|
|
|
(lazy_left + right),
|
|
|
(lazy_top + bottom)],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
if 'gt_bboxes' in results:
|
|
|
assert not self.lazy
|
|
|
results = self._all_box_crop(results, results['crop_bbox'])
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}('
|
|
|
f'area_range={self.area_range}, '
|
|
|
f'aspect_ratio_range={self.aspect_ratio_range}, '
|
|
|
f'lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class MultiScaleCrop(RandomCrop):
|
|
|
"""Crop images with a list of randomly selected scales.
|
|
|
|
|
|
Randomly select the w and h scales from a list of scales. Scale of 1 means
|
|
|
the base size, which is the minimal of image width and height. The scale
|
|
|
level of w and h is controlled to be smaller than a certain value to
|
|
|
prevent too large or small aspect ratio.
|
|
|
|
|
|
Required keys are "img_shape", "imgs" (optional), "keypoint" (optional),
|
|
|
added or modified keys are "imgs", "crop_bbox", "img_shape", "lazy" and
|
|
|
"scales". Required keys in "lazy" are "crop_bbox", added or modified key is
|
|
|
"crop_bbox".
|
|
|
|
|
|
Args:
|
|
|
input_size (int | tuple[int]): (w, h) of network input.
|
|
|
scales (tuple[float]): width and height scales to be selected.
|
|
|
max_wh_scale_gap (int): Maximum gap of w and h scale levels.
|
|
|
Default: 1.
|
|
|
random_crop (bool): If set to True, the cropping bbox will be randomly
|
|
|
sampled, otherwise it will be sampler from fixed regions.
|
|
|
Default: False.
|
|
|
num_fixed_crops (int): If set to 5, the cropping bbox will keep 5
|
|
|
basic fixed regions: "upper left", "upper right", "lower left",
|
|
|
"lower right", "center". If set to 13, the cropping bbox will
|
|
|
append another 8 fix regions: "center left", "center right",
|
|
|
"lower center", "upper center", "upper left quarter",
|
|
|
"upper right quarter", "lower left quarter", "lower right quarter".
|
|
|
Default: 5.
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
def __init__(self,
|
|
|
input_size,
|
|
|
scales=(1, ),
|
|
|
max_wh_scale_gap=1,
|
|
|
random_crop=False,
|
|
|
num_fixed_crops=5,
|
|
|
lazy=False):
|
|
|
self.input_size = _pair(input_size)
|
|
|
if not mmengine.is_tuple_of(self.input_size, int):
|
|
|
raise TypeError(f'Input_size must be int or tuple of int, '
|
|
|
f'but got {type(input_size)}')
|
|
|
|
|
|
if not isinstance(scales, tuple):
|
|
|
raise TypeError(f'Scales must be tuple, but got {type(scales)}')
|
|
|
|
|
|
if num_fixed_crops not in [5, 13]:
|
|
|
raise ValueError(f'Num_fix_crops must be in {[5, 13]}, '
|
|
|
f'but got {num_fixed_crops}')
|
|
|
|
|
|
self.scales = scales
|
|
|
self.max_wh_scale_gap = max_wh_scale_gap
|
|
|
self.random_crop = random_crop
|
|
|
self.num_fixed_crops = num_fixed_crops
|
|
|
self.lazy = lazy
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the MultiScaleCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
|
|
|
img_h, img_w = results['img_shape']
|
|
|
base_size = min(img_h, img_w)
|
|
|
crop_sizes = [int(base_size * s) for s in self.scales]
|
|
|
|
|
|
candidate_sizes = []
|
|
|
for i, h in enumerate(crop_sizes):
|
|
|
for j, w in enumerate(crop_sizes):
|
|
|
if abs(i - j) <= self.max_wh_scale_gap:
|
|
|
candidate_sizes.append([w, h])
|
|
|
|
|
|
crop_size = random.choice(candidate_sizes)
|
|
|
for i in range(2):
|
|
|
if abs(crop_size[i] - self.input_size[i]) < 3:
|
|
|
crop_size[i] = self.input_size[i]
|
|
|
|
|
|
crop_w, crop_h = crop_size
|
|
|
|
|
|
if self.random_crop:
|
|
|
x_offset = random.randint(0, img_w - crop_w)
|
|
|
y_offset = random.randint(0, img_h - crop_h)
|
|
|
else:
|
|
|
w_step = (img_w - crop_w) // 4
|
|
|
h_step = (img_h - crop_h) // 4
|
|
|
candidate_offsets = [
|
|
|
(0, 0),
|
|
|
(4 * w_step, 0),
|
|
|
(0, 4 * h_step),
|
|
|
(4 * w_step, 4 * h_step),
|
|
|
(2 * w_step, 2 * h_step),
|
|
|
]
|
|
|
if self.num_fixed_crops == 13:
|
|
|
extra_candidate_offsets = [
|
|
|
(0, 2 * h_step),
|
|
|
(4 * w_step, 2 * h_step),
|
|
|
(2 * w_step, 4 * h_step),
|
|
|
(2 * w_step, 0 * h_step),
|
|
|
(1 * w_step, 1 * h_step),
|
|
|
(3 * w_step, 1 * h_step),
|
|
|
(1 * w_step, 3 * h_step),
|
|
|
(3 * w_step, 3 * h_step)
|
|
|
]
|
|
|
candidate_offsets.extend(extra_candidate_offsets)
|
|
|
x_offset, y_offset = random.choice(candidate_offsets)
|
|
|
|
|
|
new_h, new_w = crop_h, crop_w
|
|
|
|
|
|
crop_bbox = np.array(
|
|
|
[x_offset, y_offset, x_offset + new_w, y_offset + new_h])
|
|
|
results['crop_bbox'] = crop_bbox
|
|
|
results['img_shape'] = (new_h, new_w)
|
|
|
results['scales'] = self.scales
|
|
|
|
|
|
if 'crop_quadruple' not in results:
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
[0, 0, 1, 1],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
x_ratio, y_ratio = x_offset / img_w, y_offset / img_h
|
|
|
w_ratio, h_ratio = new_w / img_w, new_h / img_h
|
|
|
|
|
|
old_crop_quadruple = results['crop_quadruple']
|
|
|
old_x_ratio, old_y_ratio = old_crop_quadruple[0], old_crop_quadruple[1]
|
|
|
old_w_ratio, old_h_ratio = old_crop_quadruple[2], old_crop_quadruple[3]
|
|
|
new_crop_quadruple = [
|
|
|
old_x_ratio + x_ratio * old_w_ratio,
|
|
|
old_y_ratio + y_ratio * old_h_ratio, w_ratio * old_w_ratio,
|
|
|
h_ratio * old_h_ratio
|
|
|
]
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
new_crop_quadruple, dtype=np.float32)
|
|
|
|
|
|
if not self.lazy:
|
|
|
if 'keypoint' in results:
|
|
|
results['keypoint'] = self._crop_kps(results['keypoint'],
|
|
|
crop_bbox)
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._crop_imgs(results['imgs'], crop_bbox)
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Put Flip at last for now')
|
|
|
|
|
|
|
|
|
lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox']
|
|
|
left = x_offset * (lazy_right - lazy_left) / img_w
|
|
|
right = (x_offset + new_w) * (lazy_right - lazy_left) / img_w
|
|
|
top = y_offset * (lazy_bottom - lazy_top) / img_h
|
|
|
bottom = (y_offset + new_h) * (lazy_bottom - lazy_top) / img_h
|
|
|
lazyop['crop_bbox'] = np.array([(lazy_left + left),
|
|
|
(lazy_top + top),
|
|
|
(lazy_left + right),
|
|
|
(lazy_top + bottom)],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
if 'gt_bboxes' in results:
|
|
|
assert not self.lazy
|
|
|
results = self._all_box_crop(results, results['crop_bbox'])
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}('
|
|
|
f'input_size={self.input_size}, scales={self.scales}, '
|
|
|
f'max_wh_scale_gap={self.max_wh_scale_gap}, '
|
|
|
f'random_crop={self.random_crop}, '
|
|
|
f'num_fixed_crops={self.num_fixed_crops}, '
|
|
|
f'lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class Resize(BaseTransform):
|
|
|
"""Resize images to a specific size.
|
|
|
|
|
|
Required keys are "img_shape", "modality", "imgs" (optional), "keypoint"
|
|
|
(optional), added or modified keys are "imgs", "img_shape", "keep_ratio",
|
|
|
"scale_factor", "lazy", "resize_size". Required keys in "lazy" is None,
|
|
|
added or modified key is "interpolation".
|
|
|
|
|
|
Args:
|
|
|
scale (float | Tuple[int]): If keep_ratio is True, it serves as scaling
|
|
|
factor or maximum size:
|
|
|
If it is a float number, the image will be rescaled by this
|
|
|
factor, else if it is a tuple of 2 integers, the image will
|
|
|
be rescaled as large as possible within the scale.
|
|
|
Otherwise, it serves as (w, h) of output size.
|
|
|
keep_ratio (bool): If set to True, Images will be resized without
|
|
|
changing the aspect ratio. Otherwise, it will resize images to a
|
|
|
given size. Default: True.
|
|
|
interpolation (str): Algorithm used for interpolation,
|
|
|
accepted values are "nearest", "bilinear", "bicubic", "area",
|
|
|
"lanczos". Default: "bilinear".
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
def __init__(self,
|
|
|
scale,
|
|
|
keep_ratio=True,
|
|
|
interpolation='bilinear',
|
|
|
lazy=False):
|
|
|
if isinstance(scale, float):
|
|
|
if scale <= 0:
|
|
|
raise ValueError(f'Invalid scale {scale}, must be positive.')
|
|
|
elif isinstance(scale, tuple):
|
|
|
max_long_edge = max(scale)
|
|
|
max_short_edge = min(scale)
|
|
|
if max_short_edge == -1:
|
|
|
|
|
|
scale = (np.inf, max_long_edge)
|
|
|
else:
|
|
|
raise TypeError(
|
|
|
f'Scale must be float or tuple of int, but got {type(scale)}')
|
|
|
self.scale = scale
|
|
|
self.keep_ratio = keep_ratio
|
|
|
self.interpolation = interpolation
|
|
|
self.lazy = lazy
|
|
|
|
|
|
def _resize_imgs(self, imgs, new_w, new_h):
|
|
|
"""Static method for resizing keypoint."""
|
|
|
return [
|
|
|
mmcv.imresize(
|
|
|
img, (new_w, new_h), interpolation=self.interpolation)
|
|
|
for img in imgs
|
|
|
]
|
|
|
|
|
|
@staticmethod
|
|
|
def _resize_kps(kps, scale_factor):
|
|
|
"""Static method for resizing keypoint."""
|
|
|
return kps * scale_factor
|
|
|
|
|
|
@staticmethod
|
|
|
def _box_resize(box, scale_factor):
|
|
|
"""Rescale the bounding boxes according to the scale_factor.
|
|
|
|
|
|
Args:
|
|
|
box (np.ndarray): The bounding boxes.
|
|
|
scale_factor (np.ndarray): The scale factor used for rescaling.
|
|
|
"""
|
|
|
assert len(scale_factor) == 2
|
|
|
scale_factor = np.concatenate([scale_factor, scale_factor])
|
|
|
return box * scale_factor
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the Resize augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
|
|
|
if 'scale_factor' not in results:
|
|
|
results['scale_factor'] = np.array([1, 1], dtype=np.float32)
|
|
|
img_h, img_w = results['img_shape']
|
|
|
|
|
|
if self.keep_ratio:
|
|
|
new_w, new_h = mmcv.rescale_size((img_w, img_h), self.scale)
|
|
|
else:
|
|
|
new_w, new_h = self.scale
|
|
|
|
|
|
self.scale_factor = np.array([new_w / img_w, new_h / img_h],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
results['img_shape'] = (new_h, new_w)
|
|
|
results['keep_ratio'] = self.keep_ratio
|
|
|
results['scale_factor'] = results['scale_factor'] * self.scale_factor
|
|
|
|
|
|
if not self.lazy:
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._resize_imgs(results['imgs'], new_w,
|
|
|
new_h)
|
|
|
if 'keypoint' in results:
|
|
|
results['keypoint'] = self._resize_kps(results['keypoint'],
|
|
|
self.scale_factor)
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Put Flip at last for now')
|
|
|
lazyop['interpolation'] = self.interpolation
|
|
|
|
|
|
if 'gt_bboxes' in results:
|
|
|
assert not self.lazy
|
|
|
results['gt_bboxes'] = self._box_resize(results['gt_bboxes'],
|
|
|
self.scale_factor)
|
|
|
if 'proposals' in results and results['proposals'] is not None:
|
|
|
assert results['proposals'].shape[1] == 4
|
|
|
results['proposals'] = self._box_resize(
|
|
|
results['proposals'], self.scale_factor)
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}('
|
|
|
f'scale={self.scale}, keep_ratio={self.keep_ratio}, '
|
|
|
f'interpolation={self.interpolation}, '
|
|
|
f'lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class RandomRescale(BaseTransform):
|
|
|
"""Randomly resize images so that the short_edge is resized to a specific
|
|
|
size in a given range. The scale ratio is unchanged after resizing.
|
|
|
|
|
|
Required keys are "imgs", "img_shape", "modality", added or modified
|
|
|
keys are "imgs", "img_shape", "keep_ratio", "scale_factor", "resize_size",
|
|
|
"short_edge".
|
|
|
|
|
|
Args:
|
|
|
scale_range (tuple[int]): The range of short edge length. A closed
|
|
|
interval.
|
|
|
interpolation (str): Algorithm used for interpolation:
|
|
|
"nearest" | "bilinear". Default: "bilinear".
|
|
|
"""
|
|
|
|
|
|
def __init__(self, scale_range, interpolation='bilinear'):
|
|
|
self.scale_range = scale_range
|
|
|
|
|
|
assert mmengine.is_tuple_of(scale_range, int)
|
|
|
assert len(scale_range) == 2
|
|
|
assert scale_range[0] < scale_range[1]
|
|
|
assert np.all([x > 0 for x in scale_range])
|
|
|
|
|
|
self.keep_ratio = True
|
|
|
self.interpolation = interpolation
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the Resize augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
short_edge = np.random.randint(self.scale_range[0],
|
|
|
self.scale_range[1] + 1)
|
|
|
resize = Resize((-1, short_edge),
|
|
|
keep_ratio=True,
|
|
|
interpolation=self.interpolation,
|
|
|
lazy=False)
|
|
|
results = resize(results)
|
|
|
|
|
|
results['short_edge'] = short_edge
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
scale_range = self.scale_range
|
|
|
repr_str = (f'{self.__class__.__name__}('
|
|
|
f'scale_range=({scale_range[0]}, {scale_range[1]}), '
|
|
|
f'interpolation={self.interpolation})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class Flip(BaseTransform):
|
|
|
"""Flip the input images with a probability.
|
|
|
|
|
|
Reverse the order of elements in the given imgs with a specific direction.
|
|
|
The shape of the imgs is preserved, but the elements are reordered.
|
|
|
|
|
|
Required keys are "img_shape", "modality", "imgs" (optional), "keypoint"
|
|
|
(optional), added or modified keys are "imgs", "keypoint", "lazy" and
|
|
|
"flip_direction". Required keys in "lazy" is None, added or modified key
|
|
|
are "flip" and "flip_direction". The Flip augmentation should be placed
|
|
|
after any cropping / reshaping augmentations, to make sure crop_quadruple
|
|
|
is calculated properly.
|
|
|
|
|
|
Args:
|
|
|
flip_ratio (float): Probability of implementing flip. Default: 0.5.
|
|
|
direction (str): Flip imgs horizontally or vertically. Options are
|
|
|
"horizontal" | "vertical". Default: "horizontal".
|
|
|
flip_label_map (Dict[int, int] | None): Transform the label of the
|
|
|
flipped image with the specific label. Default: None.
|
|
|
left_kp (list[int]): Indexes of left keypoints, used to flip keypoints.
|
|
|
Default: None.
|
|
|
right_kp (list[ind]): Indexes of right keypoints, used to flip
|
|
|
keypoints. Default: None.
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
_directions = ['horizontal', 'vertical']
|
|
|
|
|
|
def __init__(self,
|
|
|
flip_ratio=0.5,
|
|
|
direction='horizontal',
|
|
|
flip_label_map=None,
|
|
|
left_kp=None,
|
|
|
right_kp=None,
|
|
|
lazy=False):
|
|
|
if direction not in self._directions:
|
|
|
raise ValueError(f'Direction {direction} is not supported. '
|
|
|
f'Currently support ones are {self._directions}')
|
|
|
self.flip_ratio = flip_ratio
|
|
|
self.direction = direction
|
|
|
self.flip_label_map = flip_label_map
|
|
|
self.left_kp = left_kp
|
|
|
self.right_kp = right_kp
|
|
|
self.lazy = lazy
|
|
|
|
|
|
def _flip_imgs(self, imgs, modality):
|
|
|
"""Utility function for flipping images."""
|
|
|
_ = [mmcv.imflip_(img, self.direction) for img in imgs]
|
|
|
lt = len(imgs)
|
|
|
if modality == 'Flow':
|
|
|
|
|
|
for i in range(0, lt, 2):
|
|
|
imgs[i] = mmcv.iminvert(imgs[i])
|
|
|
return imgs
|
|
|
|
|
|
def _flip_kps(self, kps, kpscores, img_width):
|
|
|
"""Utility function for flipping keypoint."""
|
|
|
kp_x = kps[..., 0]
|
|
|
kp_x[kp_x != 0] = img_width - kp_x[kp_x != 0]
|
|
|
new_order = list(range(kps.shape[2]))
|
|
|
if self.left_kp is not None and self.right_kp is not None:
|
|
|
for left, right in zip(self.left_kp, self.right_kp):
|
|
|
new_order[left] = right
|
|
|
new_order[right] = left
|
|
|
kps = kps[:, :, new_order]
|
|
|
if kpscores is not None:
|
|
|
kpscores = kpscores[:, :, new_order]
|
|
|
return kps, kpscores
|
|
|
|
|
|
@staticmethod
|
|
|
def _box_flip(box, img_width):
|
|
|
"""Flip the bounding boxes given the width of the image.
|
|
|
|
|
|
Args:
|
|
|
box (np.ndarray): The bounding boxes.
|
|
|
img_width (int): The img width.
|
|
|
"""
|
|
|
box_ = box.copy()
|
|
|
box_[..., 0::4] = img_width - box[..., 2::4]
|
|
|
box_[..., 2::4] = img_width - box[..., 0::4]
|
|
|
return box_
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the Flip augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
assert self.direction == 'horizontal', (
|
|
|
'Only horizontal flips are'
|
|
|
'supported for human keypoints')
|
|
|
|
|
|
modality = results['modality']
|
|
|
if modality == 'Flow':
|
|
|
assert self.direction == 'horizontal'
|
|
|
|
|
|
flip = np.random.rand() < self.flip_ratio
|
|
|
|
|
|
results['flip'] = flip
|
|
|
results['flip_direction'] = self.direction
|
|
|
img_width = results['img_shape'][1]
|
|
|
|
|
|
if self.flip_label_map is not None and flip:
|
|
|
results['label'] = self.flip_label_map.get(results['label'],
|
|
|
results['label'])
|
|
|
|
|
|
if not self.lazy:
|
|
|
if flip:
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._flip_imgs(results['imgs'],
|
|
|
modality)
|
|
|
if 'keypoint' in results:
|
|
|
kp = results['keypoint']
|
|
|
kpscore = results.get('keypoint_score', None)
|
|
|
kp, kpscore = self._flip_kps(kp, kpscore, img_width)
|
|
|
results['keypoint'] = kp
|
|
|
if 'keypoint_score' in results:
|
|
|
results['keypoint_score'] = kpscore
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Use one Flip please')
|
|
|
lazyop['flip'] = flip
|
|
|
lazyop['flip_direction'] = self.direction
|
|
|
|
|
|
if 'gt_bboxes' in results and flip:
|
|
|
assert not self.lazy and self.direction == 'horizontal'
|
|
|
width = results['img_shape'][1]
|
|
|
results['gt_bboxes'] = self._box_flip(results['gt_bboxes'], width)
|
|
|
if 'proposals' in results and results['proposals'] is not None:
|
|
|
assert results['proposals'].shape[1] == 4
|
|
|
results['proposals'] = self._box_flip(results['proposals'],
|
|
|
width)
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (
|
|
|
f'{self.__class__.__name__}('
|
|
|
f'flip_ratio={self.flip_ratio}, direction={self.direction}, '
|
|
|
f'flip_label_map={self.flip_label_map}, lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class ColorJitter(BaseTransform):
|
|
|
"""Perform ColorJitter to each img.
|
|
|
|
|
|
Required keys are "imgs", added or modified keys are "imgs".
|
|
|
|
|
|
Args:
|
|
|
brightness (float | tuple[float]): The jitter range for brightness, if
|
|
|
set as a float, the range will be (1 - brightness, 1 + brightness).
|
|
|
Default: 0.5.
|
|
|
contrast (float | tuple[float]): The jitter range for contrast, if set
|
|
|
as a float, the range will be (1 - contrast, 1 + contrast).
|
|
|
Default: 0.5.
|
|
|
saturation (float | tuple[float]): The jitter range for saturation, if
|
|
|
set as a float, the range will be (1 - saturation, 1 + saturation).
|
|
|
Default: 0.5.
|
|
|
hue (float | tuple[float]): The jitter range for hue, if set as a
|
|
|
float, the range will be (-hue, hue). Default: 0.1.
|
|
|
"""
|
|
|
|
|
|
@staticmethod
|
|
|
def check_input(val, max, base):
|
|
|
if isinstance(val, tuple):
|
|
|
assert base - max <= val[0] <= val[1] <= base + max
|
|
|
return val
|
|
|
assert val <= max
|
|
|
return (base - val, base + val)
|
|
|
|
|
|
@staticmethod
|
|
|
def rgb_to_grayscale(img):
|
|
|
return 0.2989 * img[..., 0] + 0.587 * img[..., 1] + 0.114 * img[..., 2]
|
|
|
|
|
|
@staticmethod
|
|
|
def adjust_contrast(img, factor):
|
|
|
val = np.mean(ColorJitter.rgb_to_grayscale(img))
|
|
|
return factor * img + (1 - factor) * val
|
|
|
|
|
|
@staticmethod
|
|
|
def adjust_saturation(img, factor):
|
|
|
gray = np.stack([ColorJitter.rgb_to_grayscale(img)] * 3, axis=-1)
|
|
|
return factor * img + (1 - factor) * gray
|
|
|
|
|
|
@staticmethod
|
|
|
def adjust_hue(img, factor):
|
|
|
img = np.clip(img, 0, 255).astype(np.uint8)
|
|
|
hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
|
|
|
offset = int(factor * 255)
|
|
|
hsv[..., 0] = (hsv[..., 0] + offset) % 180
|
|
|
img = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
|
|
|
return img.astype(np.float32)
|
|
|
|
|
|
def __init__(self, brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1):
|
|
|
self.brightness = self.check_input(brightness, 1, 1)
|
|
|
self.contrast = self.check_input(contrast, 1, 1)
|
|
|
self.saturation = self.check_input(saturation, 1, 1)
|
|
|
self.hue = self.check_input(hue, 0.5, 0)
|
|
|
self.fn_idx = np.random.permutation(4)
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Perform ColorJitter.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
imgs = results['imgs']
|
|
|
num_clips, clip_len = 1, len(imgs)
|
|
|
|
|
|
new_imgs = []
|
|
|
for i in range(num_clips):
|
|
|
b = np.random.uniform(
|
|
|
low=self.brightness[0], high=self.brightness[1])
|
|
|
c = np.random.uniform(low=self.contrast[0], high=self.contrast[1])
|
|
|
s = np.random.uniform(
|
|
|
low=self.saturation[0], high=self.saturation[1])
|
|
|
h = np.random.uniform(low=self.hue[0], high=self.hue[1])
|
|
|
start, end = i * clip_len, (i + 1) * clip_len
|
|
|
|
|
|
for img in imgs[start:end]:
|
|
|
img = img.astype(np.float32)
|
|
|
for fn_id in self.fn_idx:
|
|
|
if fn_id == 0 and b != 1:
|
|
|
img *= b
|
|
|
if fn_id == 1 and c != 1:
|
|
|
img = self.adjust_contrast(img, c)
|
|
|
if fn_id == 2 and s != 1:
|
|
|
img = self.adjust_saturation(img, s)
|
|
|
if fn_id == 3 and h != 0:
|
|
|
img = self.adjust_hue(img, h)
|
|
|
img = np.clip(img, 0, 255).astype(np.uint8)
|
|
|
new_imgs.append(img)
|
|
|
results['imgs'] = new_imgs
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}('
|
|
|
f'brightness={self.brightness}, '
|
|
|
f'contrast={self.contrast}, '
|
|
|
f'saturation={self.saturation}, '
|
|
|
f'hue={self.hue})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class CenterCrop(RandomCrop):
|
|
|
"""Crop the center area from images.
|
|
|
|
|
|
Required keys are "img_shape", "imgs" (optional), "keypoint" (optional),
|
|
|
added or modified keys are "imgs", "keypoint", "crop_bbox", "lazy" and
|
|
|
"img_shape". Required keys in "lazy" is "crop_bbox", added or modified key
|
|
|
is "crop_bbox".
|
|
|
|
|
|
Args:
|
|
|
crop_size (int | tuple[int]): (w, h) of crop size.
|
|
|
lazy (bool): Determine whether to apply lazy operation. Default: False.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, crop_size, lazy=False):
|
|
|
self.crop_size = _pair(crop_size)
|
|
|
self.lazy = lazy
|
|
|
if not mmengine.is_tuple_of(self.crop_size, int):
|
|
|
raise TypeError(f'Crop_size must be int or tuple of int, '
|
|
|
f'but got {type(crop_size)}')
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the CenterCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, self.lazy)
|
|
|
if 'keypoint' in results:
|
|
|
assert not self.lazy, ('Keypoint Augmentations are not compatible '
|
|
|
'with lazy == True')
|
|
|
|
|
|
img_h, img_w = results['img_shape']
|
|
|
crop_w, crop_h = self.crop_size
|
|
|
|
|
|
left = (img_w - crop_w) // 2
|
|
|
top = (img_h - crop_h) // 2
|
|
|
right = left + crop_w
|
|
|
bottom = top + crop_h
|
|
|
new_h, new_w = bottom - top, right - left
|
|
|
|
|
|
crop_bbox = np.array([left, top, right, bottom])
|
|
|
results['crop_bbox'] = crop_bbox
|
|
|
results['img_shape'] = (new_h, new_w)
|
|
|
|
|
|
if 'crop_quadruple' not in results:
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
[0, 0, 1, 1],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
x_ratio, y_ratio = left / img_w, top / img_h
|
|
|
w_ratio, h_ratio = new_w / img_w, new_h / img_h
|
|
|
|
|
|
old_crop_quadruple = results['crop_quadruple']
|
|
|
old_x_ratio, old_y_ratio = old_crop_quadruple[0], old_crop_quadruple[1]
|
|
|
old_w_ratio, old_h_ratio = old_crop_quadruple[2], old_crop_quadruple[3]
|
|
|
new_crop_quadruple = [
|
|
|
old_x_ratio + x_ratio * old_w_ratio,
|
|
|
old_y_ratio + y_ratio * old_h_ratio, w_ratio * old_w_ratio,
|
|
|
h_ratio * old_h_ratio
|
|
|
]
|
|
|
results['crop_quadruple'] = np.array(
|
|
|
new_crop_quadruple, dtype=np.float32)
|
|
|
|
|
|
if not self.lazy:
|
|
|
if 'keypoint' in results:
|
|
|
results['keypoint'] = self._crop_kps(results['keypoint'],
|
|
|
crop_bbox)
|
|
|
if 'imgs' in results:
|
|
|
results['imgs'] = self._crop_imgs(results['imgs'], crop_bbox)
|
|
|
else:
|
|
|
lazyop = results['lazy']
|
|
|
if lazyop['flip']:
|
|
|
raise NotImplementedError('Put Flip at last for now')
|
|
|
|
|
|
|
|
|
lazy_left, lazy_top, lazy_right, lazy_bottom = lazyop['crop_bbox']
|
|
|
left = left * (lazy_right - lazy_left) / img_w
|
|
|
right = right * (lazy_right - lazy_left) / img_w
|
|
|
top = top * (lazy_bottom - lazy_top) / img_h
|
|
|
bottom = bottom * (lazy_bottom - lazy_top) / img_h
|
|
|
lazyop['crop_bbox'] = np.array([(lazy_left + left),
|
|
|
(lazy_top + top),
|
|
|
(lazy_left + right),
|
|
|
(lazy_top + bottom)],
|
|
|
dtype=np.float32)
|
|
|
|
|
|
if 'gt_bboxes' in results:
|
|
|
assert not self.lazy
|
|
|
results = self._all_box_crop(results, results['crop_bbox'])
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = (f'{self.__class__.__name__}(crop_size={self.crop_size}, '
|
|
|
f'lazy={self.lazy})')
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class ThreeCrop(BaseTransform):
|
|
|
"""Crop images into three crops.
|
|
|
|
|
|
Crop the images equally into three crops with equal intervals along the
|
|
|
shorter side.
|
|
|
Required keys are "imgs", "img_shape", added or modified keys are "imgs",
|
|
|
"crop_bbox" and "img_shape".
|
|
|
|
|
|
Args:
|
|
|
crop_size(int | tuple[int]): (w, h) of crop size.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, crop_size):
|
|
|
self.crop_size = _pair(crop_size)
|
|
|
if not mmengine.is_tuple_of(self.crop_size, int):
|
|
|
raise TypeError(f'Crop_size must be int or tuple of int, '
|
|
|
f'but got {type(crop_size)}')
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the ThreeCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, False)
|
|
|
if 'gt_bboxes' in results or 'proposals' in results:
|
|
|
warnings.warn('ThreeCrop cannot process bounding boxes')
|
|
|
|
|
|
imgs = results['imgs']
|
|
|
img_h, img_w = results['imgs'][0].shape[:2]
|
|
|
crop_w, crop_h = self.crop_size
|
|
|
assert crop_h == img_h or crop_w == img_w
|
|
|
|
|
|
if crop_h == img_h:
|
|
|
w_step = (img_w - crop_w) // 2
|
|
|
offsets = [
|
|
|
(0, 0),
|
|
|
(2 * w_step, 0),
|
|
|
(w_step, 0),
|
|
|
]
|
|
|
elif crop_w == img_w:
|
|
|
h_step = (img_h - crop_h) // 2
|
|
|
offsets = [
|
|
|
(0, 0),
|
|
|
(0, 2 * h_step),
|
|
|
(0, h_step),
|
|
|
]
|
|
|
|
|
|
cropped = []
|
|
|
crop_bboxes = []
|
|
|
for x_offset, y_offset in offsets:
|
|
|
bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
|
|
|
crop = [
|
|
|
img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
|
|
|
for img in imgs
|
|
|
]
|
|
|
cropped.extend(crop)
|
|
|
crop_bboxes.extend([bbox for _ in range(len(imgs))])
|
|
|
|
|
|
crop_bboxes = np.array(crop_bboxes)
|
|
|
results['imgs'] = cropped
|
|
|
results['crop_bbox'] = crop_bboxes
|
|
|
results['img_shape'] = results['imgs'][0].shape[:2]
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = f'{self.__class__.__name__}(crop_size={self.crop_size})'
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class TenCrop(BaseTransform):
|
|
|
"""Crop the images into 10 crops (corner + center + flip).
|
|
|
|
|
|
Crop the four corners and the center part of the image with the same
|
|
|
given crop_size, and flip it horizontally.
|
|
|
Required keys are "imgs", "img_shape", added or modified keys are "imgs",
|
|
|
"crop_bbox" and "img_shape".
|
|
|
|
|
|
Args:
|
|
|
crop_size(int | tuple[int]): (w, h) of crop size.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, crop_size):
|
|
|
self.crop_size = _pair(crop_size)
|
|
|
if not mmengine.is_tuple_of(self.crop_size, int):
|
|
|
raise TypeError(f'Crop_size must be int or tuple of int, '
|
|
|
f'but got {type(crop_size)}')
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""Performs the TenCrop augmentation.
|
|
|
|
|
|
Args:
|
|
|
results (dict): The resulting dict to be modified and passed
|
|
|
to the next transform in pipeline.
|
|
|
"""
|
|
|
_init_lazy_if_proper(results, False)
|
|
|
|
|
|
if 'gt_bboxes' in results or 'proposals' in results:
|
|
|
warnings.warn('TenCrop cannot process bounding boxes')
|
|
|
|
|
|
imgs = results['imgs']
|
|
|
|
|
|
img_h, img_w = results['imgs'][0].shape[:2]
|
|
|
crop_w, crop_h = self.crop_size
|
|
|
|
|
|
w_step = (img_w - crop_w) // 4
|
|
|
h_step = (img_h - crop_h) // 4
|
|
|
|
|
|
offsets = [
|
|
|
(0, 0),
|
|
|
(4 * w_step, 0),
|
|
|
(0, 4 * h_step),
|
|
|
(4 * w_step, 4 * h_step),
|
|
|
(2 * w_step, 2 * h_step),
|
|
|
]
|
|
|
|
|
|
img_crops = list()
|
|
|
crop_bboxes = list()
|
|
|
for x_offset, y_offsets in offsets:
|
|
|
crop = [
|
|
|
img[y_offsets:y_offsets + crop_h, x_offset:x_offset + crop_w]
|
|
|
for img in imgs
|
|
|
]
|
|
|
flip_crop = [np.flip(c, axis=1).copy() for c in crop]
|
|
|
bbox = [x_offset, y_offsets, x_offset + crop_w, y_offsets + crop_h]
|
|
|
img_crops.extend(crop)
|
|
|
img_crops.extend(flip_crop)
|
|
|
crop_bboxes.extend([bbox for _ in range(len(imgs) * 2)])
|
|
|
|
|
|
crop_bboxes = np.array(crop_bboxes)
|
|
|
results['imgs'] = img_crops
|
|
|
results['crop_bbox'] = crop_bboxes
|
|
|
results['img_shape'] = results['imgs'][0].shape[:2]
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = f'{self.__class__.__name__}(crop_size={self.crop_size})'
|
|
|
return repr_str
|
|
|
|
|
|
|
|
|
@TRANSFORMS.register_module()
|
|
|
class RandomErasing(BaseTransform):
|
|
|
"""Randomly selects a rectangle region in an image and erase pixels.
|
|
|
basically refer mmcls.
|
|
|
|
|
|
**Required Keys:**
|
|
|
|
|
|
- img
|
|
|
|
|
|
**Modified Keys:**
|
|
|
|
|
|
- img
|
|
|
|
|
|
Args:
|
|
|
erase_prob (float): Probability that image will be randomly erased.
|
|
|
Default: 0.5
|
|
|
min_area_ratio (float): Minimum erased area / input image area
|
|
|
Default: 0.02
|
|
|
max_area_ratio (float): Maximum erased area / input image area
|
|
|
Default: 1/3
|
|
|
aspect_range (sequence | float): Aspect ratio range of erased area.
|
|
|
if float, it will be converted to (aspect_ratio, 1/aspect_ratio)
|
|
|
Default: (3/10, 10/3)
|
|
|
mode (str): Fill method in erased area, can be:
|
|
|
|
|
|
- const (default): All pixels are assign with the same value.
|
|
|
- rand: each pixel is assigned with a random value in [0, 255]
|
|
|
|
|
|
fill_color (sequence | Number): Base color filled in erased area.
|
|
|
Defaults to (128, 128, 128).
|
|
|
fill_std (sequence | Number, optional): If set and ``mode`` is 'rand',
|
|
|
fill erased area with random color from normal distribution
|
|
|
(mean=fill_color, std=fill_std); If not set, fill erased area with
|
|
|
random color from uniform distribution (0~255). Defaults to None.
|
|
|
|
|
|
Note:
|
|
|
See `Random Erasing Data Augmentation
|
|
|
<https://arxiv.org/pdf/1708.04896.pdf>`_
|
|
|
|
|
|
This paper provided 4 modes: RE-R, RE-M, RE-0, RE-255, and use RE-M as
|
|
|
default. The config of these 4 modes are:
|
|
|
|
|
|
- RE-R: RandomErasing(mode='rand')
|
|
|
- RE-M: RandomErasing(mode='const', fill_color=(123.67, 116.3, 103.5))
|
|
|
- RE-0: RandomErasing(mode='const', fill_color=0)
|
|
|
- RE-255: RandomErasing(mode='const', fill_color=255)
|
|
|
"""
|
|
|
|
|
|
def __init__(self,
|
|
|
erase_prob=0.5,
|
|
|
min_area_ratio=0.02,
|
|
|
max_area_ratio=1 / 3,
|
|
|
aspect_range=(3 / 10, 10 / 3),
|
|
|
mode='const',
|
|
|
fill_color=(128, 128, 128),
|
|
|
fill_std=None):
|
|
|
assert isinstance(erase_prob, float) and 0. <= erase_prob <= 1.
|
|
|
assert isinstance(min_area_ratio, float) and 0. <= min_area_ratio <= 1.
|
|
|
assert isinstance(max_area_ratio, float) and 0. <= max_area_ratio <= 1.
|
|
|
assert min_area_ratio <= max_area_ratio, \
|
|
|
'min_area_ratio should be smaller than max_area_ratio'
|
|
|
if isinstance(aspect_range, float):
|
|
|
aspect_range = min(aspect_range, 1 / aspect_range)
|
|
|
aspect_range = (aspect_range, 1 / aspect_range)
|
|
|
assert isinstance(aspect_range, Sequence) and len(aspect_range) == 2 \
|
|
|
and all(isinstance(x, float) for x in aspect_range), \
|
|
|
'aspect_range should be a float or Sequence with two float.'
|
|
|
assert all(x > 0 for x in aspect_range), \
|
|
|
'aspect_range should be positive.'
|
|
|
assert aspect_range[0] <= aspect_range[1], \
|
|
|
'In aspect_range (min, max), min should be smaller than max.'
|
|
|
assert mode in ['const', 'rand'], \
|
|
|
'Please select `mode` from ["const", "rand"].'
|
|
|
if isinstance(fill_color, Number):
|
|
|
fill_color = [fill_color] * 3
|
|
|
assert isinstance(fill_color, Sequence) and len(fill_color) == 3 \
|
|
|
and all(isinstance(x, Number) for x in fill_color), \
|
|
|
'fill_color should be a float or Sequence with three int.'
|
|
|
if fill_std is not None:
|
|
|
if isinstance(fill_std, Number):
|
|
|
fill_std = [fill_std] * 3
|
|
|
assert isinstance(fill_std, Sequence) and len(fill_std) == 3 \
|
|
|
and all(isinstance(x, Number) for x in fill_std), \
|
|
|
'fill_std should be a float or Sequence with three int.'
|
|
|
|
|
|
self.erase_prob = erase_prob
|
|
|
self.min_area_ratio = min_area_ratio
|
|
|
self.max_area_ratio = max_area_ratio
|
|
|
self.aspect_range = aspect_range
|
|
|
self.mode = mode
|
|
|
self.fill_color = fill_color
|
|
|
self.fill_std = fill_std
|
|
|
|
|
|
def _img_fill_pixels(self, img, top, left, h, w):
|
|
|
"""Fill pixels to the patch of image."""
|
|
|
if self.mode == 'const':
|
|
|
patch = np.empty((h, w, 3), dtype=np.uint8)
|
|
|
patch[:, :] = np.array(self.fill_color, dtype=np.uint8)
|
|
|
elif self.fill_std is None:
|
|
|
|
|
|
patch = np.random.uniform(0, 256, (h, w, 3)).astype(np.uint8)
|
|
|
else:
|
|
|
|
|
|
patch = np.random.normal(self.fill_color, self.fill_std, (h, w, 3))
|
|
|
patch = np.clip(patch.astype(np.int32), 0, 255).astype(np.uint8)
|
|
|
|
|
|
img[top:top + h, left:left + w] = patch
|
|
|
return img
|
|
|
|
|
|
def _fill_pixels(self, imgs, top, left, h, w):
|
|
|
"""Fill pixels to the patch of each image in frame clip."""
|
|
|
return [self._img_fill_pixels(img, top, left, h, w) for img in imgs]
|
|
|
|
|
|
@cache_randomness
|
|
|
def random_disable(self):
|
|
|
"""Randomly disable the transform."""
|
|
|
return np.random.rand() > self.erase_prob
|
|
|
|
|
|
@cache_randomness
|
|
|
def random_patch(self, img_h, img_w):
|
|
|
"""Randomly generate patch the erase."""
|
|
|
|
|
|
|
|
|
log_aspect_range = np.log(
|
|
|
np.array(self.aspect_range, dtype=np.float32))
|
|
|
aspect_ratio = np.exp(np.random.uniform(*log_aspect_range))
|
|
|
area = img_h * img_w
|
|
|
area *= np.random.uniform(self.min_area_ratio, self.max_area_ratio)
|
|
|
|
|
|
h = min(int(round(np.sqrt(area * aspect_ratio))), img_h)
|
|
|
w = min(int(round(np.sqrt(area / aspect_ratio))), img_w)
|
|
|
top = np.random.randint(0, img_h - h) if img_h > h else 0
|
|
|
left = np.random.randint(0, img_w - w) if img_w > w else 0
|
|
|
return top, left, h, w
|
|
|
|
|
|
def transform(self, results):
|
|
|
"""
|
|
|
Args:
|
|
|
results (dict): Results dict from pipeline
|
|
|
|
|
|
Returns:
|
|
|
dict: Results after the transformation.
|
|
|
"""
|
|
|
if self.random_disable():
|
|
|
return results
|
|
|
|
|
|
imgs = results['imgs']
|
|
|
img_h, img_w = imgs[0].shape[:2]
|
|
|
|
|
|
imgs = self._fill_pixels(imgs, *self.random_patch(img_h, img_w))
|
|
|
|
|
|
results['imgs'] = imgs
|
|
|
|
|
|
return results
|
|
|
|
|
|
def __repr__(self):
|
|
|
repr_str = self.__class__.__name__
|
|
|
repr_str += f'(erase_prob={self.erase_prob}, '
|
|
|
repr_str += f'min_area_ratio={self.min_area_ratio}, '
|
|
|
repr_str += f'max_area_ratio={self.max_area_ratio}, '
|
|
|
repr_str += f'aspect_range={self.aspect_range}, '
|
|
|
repr_str += f'mode={self.mode}, '
|
|
|
repr_str += f'fill_color={self.fill_color}, '
|
|
|
repr_str += f'fill_std={self.fill_std})'
|
|
|
return repr_str
|
|
|
|