model_fall / PaddleDetection-release-2.6 /ppdet /data /transform /keypoint_operators.py

Upload 2120 files

7b7527a almost 3 years ago

61.7 kB

	# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# function:
	# operators to process sample,
	# eg: decode/resize/crop image

	from __future__ import absolute_import

	try:
	from collections.abc import Sequence
	except Exception:
	from collections import Sequence

	import cv2
	import numpy as np
	import math
	import copy

	from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
	from ppdet.core.workspace import serializable
	from ppdet.utils.logger import setup_logger
	logger = setup_logger(__name__)

	registered_ops = []

	__all__ = [
	'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
	'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
	'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
	'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
	'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
	'FlipPose', 'PETR_Resize'
	]


	def register_keypointop(cls):
	return serializable(cls)


	@register_keypointop
	class KeyPointFlip(object):
	"""Get the fliped image by flip_prob. flip the coords also
	the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped

	Args:
	flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
	hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
	flip_prob (float): the ratio whether to flip the image
	records(dict): the dict contained the image, mask and coords

	Returns:
	records(dict): contain the image, mask and coords after tranformed

	"""

	def __init__(self, flip_permutation, hmsize=None, flip_prob=0.5):
	super(KeyPointFlip, self).__init__()
	assert isinstance(flip_permutation, Sequence)
	self.flip_permutation = flip_permutation
	self.flip_prob = flip_prob
	self.hmsize = hmsize

	def _flipjoints(self, records, sizelst):
	'''
	records['gt_joints'] is Sequence in higherhrnet
	'''
	if not ('gt_joints' in records and len(records['gt_joints']) > 0):
	return records

	kpts_lst = records['gt_joints']
	if isinstance(kpts_lst, Sequence):
	for idx, hmsize in enumerate(sizelst):
	if kpts_lst[idx].ndim == 3:
	kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
	else:
	kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
	kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
	else:
	hmsize = sizelst[0]
	if kpts_lst.ndim == 3:
	kpts_lst = kpts_lst[:, self.flip_permutation]
	else:
	kpts_lst = kpts_lst[self.flip_permutation]
	kpts_lst[..., 0] = hmsize - kpts_lst[..., 0]

	records['gt_joints'] = kpts_lst
	return records

	def _flipmask(self, records, sizelst):
	if not 'mask' in records:
	return records

	mask_lst = records['mask']
	for idx, hmsize in enumerate(sizelst):
	if len(mask_lst) > idx:
	mask_lst[idx] = mask_lst[idx][:, ::-1]
	records['mask'] = mask_lst
	return records

	def _flipbbox(self, records, sizelst):
	if not 'gt_bbox' in records:
	return records

	bboxes = records['gt_bbox']
	hmsize = sizelst[0]
	bboxes[:, 0::2] = hmsize - bboxes[:, 0::2][:, ::-1]
	bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, hmsize)
	records['gt_bbox'] = bboxes
	return records

	def __call__(self, records):
	flip = np.random.random() < self.flip_prob
	if flip:
	image = records['image']
	image = image[:, ::-1]
	records['image'] = image
	if self.hmsize is None:
	sizelst = [image.shape[1]]
	else:
	sizelst = self.hmsize
	self._flipjoints(records, sizelst)
	self._flipmask(records, sizelst)
	self._flipbbox(records, sizelst)

	return records


	@register_keypointop
	class RandomAffine(object):
	"""apply affine transform to image, mask and coords
	to achieve the rotate, scale and shift effect for training image

	Args:
	max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
	max_scale (list[2]): the scale range to apply, transform range is [min, max]
	max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shiftimagesize, max_shiftimagesize]
	hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
	trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
	scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
	records(dict): the dict contained the image, mask and coords

	Returns:
	records(dict): contain the image, mask and coords after tranformed

	"""

	def __init__(self,
	max_degree=30,
	scale=[0.75, 1.5],
	max_shift=0.2,
	hmsize=None,
	trainsize=[512, 512],
	scale_type='short',
	boldervalue=[114, 114, 114]):
	super(RandomAffine, self).__init__()
	self.max_degree = max_degree
	self.min_scale = scale[0]
	self.max_scale = scale[1]
	self.max_shift = max_shift
	self.hmsize = hmsize
	self.trainsize = trainsize
	self.scale_type = scale_type
	self.boldervalue = boldervalue

	def _get_affine_matrix_old(self, center, scale, res, rot=0):
	"""Generate transformation matrix."""
	h = scale
	t = np.zeros((3, 3), dtype=np.float32)
	t[0, 0] = float(res[1]) / h
	t[1, 1] = float(res[0]) / h
	t[0, 2] = res[1] * (-float(center[0]) / h + .5)
	t[1, 2] = res[0] * (-float(center[1]) / h + .5)
	t[2, 2] = 1
	if rot != 0:
	rot = -rot # To match direction of rotation from cropping
	rot_mat = np.zeros((3, 3), dtype=np.float32)
	rot_rad = rot * np.pi / 180
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	rot_mat[0, :2] = [cs, -sn]
	rot_mat[1, :2] = [sn, cs]
	rot_mat[2, 2] = 1
	# Need to rotate around center
	t_mat = np.eye(3)
	t_mat[0, 2] = -res[1] / 2
	t_mat[1, 2] = -res[0] / 2
	t_inv = t_mat.copy()
	t_inv[:2, 2] *= -1
	t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
	return t

	def _get_affine_matrix(self, center, scale, res, rot=0):
	"""Generate transformation matrix."""
	w, h = scale
	t = np.zeros((3, 3), dtype=np.float32)
	t[0, 0] = float(res[0]) / w
	t[1, 1] = float(res[1]) / h
	t[0, 2] = res[0] * (-float(center[0]) / w + .5)
	t[1, 2] = res[1] * (-float(center[1]) / h + .5)
	t[2, 2] = 1
	if rot != 0:
	rot = -rot # To match direction of rotation from cropping
	rot_mat = np.zeros((3, 3), dtype=np.float32)
	rot_rad = rot * np.pi / 180
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	rot_mat[0, :2] = [cs, -sn]
	rot_mat[1, :2] = [sn, cs]
	rot_mat[2, 2] = 1
	# Need to rotate around center
	t_mat = np.eye(3)
	t_mat[0, 2] = -res[0] / 2
	t_mat[1, 2] = -res[1] / 2
	t_inv = t_mat.copy()
	t_inv[:2, 2] *= -1
	t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
	return t

	def _affine_joints_mask(self,
	degree,
	center,
	roi_size,
	dsize,
	keypoints=None,
	heatmap_mask=None,
	gt_bbox=None):
	kpts = None
	mask = None
	bbox = None
	mask_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
	degree)[:2]
	if heatmap_mask is not None:
	mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, dsize)
	mask = ((mask / 255) > 0.5).astype(np.float32)
	if keypoints is not None:
	kpts = copy.deepcopy(keypoints)
	kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
	mask_affine_mat)
	kpts[(kpts[..., 0]) > dsize[0], :] = 0
	kpts[(kpts[..., 1]) > dsize[1], :] = 0
	kpts[(kpts[..., 0]) < 0, :] = 0
	kpts[(kpts[..., 1]) < 0, :] = 0
	if gt_bbox is not None:
	temp_bbox = gt_bbox[:, [0, 3, 2, 1]]
	cat_bbox = np.concatenate((gt_bbox, temp_bbox), axis=-1)
	gt_bbox_warped = warp_affine_joints(cat_bbox, mask_affine_mat)
	bbox = np.zeros_like(gt_bbox)
	bbox[:, 0] = gt_bbox_warped[:, 0::2].min(1).clip(0, dsize[0])
	bbox[:, 2] = gt_bbox_warped[:, 0::2].max(1).clip(0, dsize[0])
	bbox[:, 1] = gt_bbox_warped[:, 1::2].min(1).clip(0, dsize[1])
	bbox[:, 3] = gt_bbox_warped[:, 1::2].max(1).clip(0, dsize[1])
	return kpts, mask, bbox

	def __call__(self, records):
	image = records['image']
	shape = np.array(image.shape[:2][::-1])
	keypoints = None
	heatmap_mask = None
	gt_bbox = None
	if 'gt_joints' in records:
	keypoints = records['gt_joints']

	if 'mask' in records:
	heatmap_mask = records['mask']
	heatmap_mask *= 255

	if 'gt_bbox' in records:
	gt_bbox = records['gt_bbox']

	degree = (np.random.random() * 2 - 1) * self.max_degree
	center = center = np.array((np.array(shape) / 2))

	aug_scale = np.random.random() * (self.max_scale - self.min_scale
	) + self.min_scale
	if self.scale_type == 'long':
	scale = np.array([max(shape[0], shape[1]) / 1.0] * 2)
	elif self.scale_type == 'short':
	scale = np.array([min(shape[0], shape[1]) / 1.0] * 2)
	elif self.scale_type == 'wh':
	scale = shape
	else:
	raise ValueError('Unknown scale type: {}'.format(self.scale_type))
	roi_size = aug_scale * scale
	dx = int(0)
	dy = int(0)
	if self.max_shift > 0:

	dx = np.random.randint(-self.max_shift * roi_size[0],
	self.max_shift * roi_size[0])
	dy = np.random.randint(-self.max_shift * roi_size[0],
	self.max_shift * roi_size[1])

	center += np.array([dx, dy])
	input_size = 2 * center
	if self.trainsize != -1:
	dsize = self.trainsize
	imgshape = (dsize)
	else:
	dsize = scale
	imgshape = (shape.tolist())

	image_affine_mat = self._get_affine_matrix(center, roi_size, dsize,
	degree)[:2]
	image = cv2.warpAffine(
	image,
	image_affine_mat,
	imgshape,
	flags=cv2.INTER_LINEAR,
	borderValue=self.boldervalue)

	if self.hmsize is None:
	kpts, mask, gt_bbox = self._affine_joints_mask(
	degree, center, roi_size, dsize, keypoints, heatmap_mask,
	gt_bbox)
	records['image'] = image
	if kpts is not None: records['gt_joints'] = kpts
	if mask is not None: records['mask'] = mask
	if gt_bbox is not None: records['gt_bbox'] = gt_bbox
	return records

	kpts_lst = []
	mask_lst = []
	for hmsize in self.hmsize:
	kpts, mask, gt_bbox = self._affine_joints_mask(
	degree, center, roi_size, [hmsize, hmsize], keypoints,
	heatmap_mask, gt_bbox)
	kpts_lst.append(kpts)
	mask_lst.append(mask)
	records['image'] = image

	if 'gt_joints' in records:
	records['gt_joints'] = kpts_lst
	if 'mask' in records:
	records['mask'] = mask_lst
	if 'gt_bbox' in records:
	records['gt_bbox'] = gt_bbox
	return records


	@register_keypointop
	class EvalAffine(object):
	"""apply affine transform to image
	resize the short of [h,w] to standard size for eval

	Args:
	size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
	records(dict): the dict contained the image, mask and coords

	Returns:
	records(dict): contain the image, mask and coords after tranformed

	"""

	def __init__(self, size, stride=64):
	super(EvalAffine, self).__init__()
	self.size = size
	self.stride = stride

	def __call__(self, records):
	image = records['image']
	mask = records['mask'] if 'mask' in records else None
	s = self.size
	h, w, _ = image.shape
	trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
	image_resized = cv2.warpAffine(image, trans, size_resized)
	if mask is not None:
	mask = cv2.warpAffine(mask, trans, size_resized)
	records['mask'] = mask
	if 'gt_joints' in records:
	del records['gt_joints']
	records['image'] = image_resized
	records['scale_factor'] = self.size / min(h, w)
	return records


	@register_keypointop
	class NormalizePermute(object):
	def __init__(self,
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.120, 57.375],
	is_scale=True):
	super(NormalizePermute, self).__init__()
	self.mean = mean
	self.std = std
	self.is_scale = is_scale

	def __call__(self, records):
	image = records['image']
	image = image.astype(np.float32)
	if self.is_scale:
	image /= 255.
	image = image.transpose((2, 0, 1))
	mean = np.array(self.mean, dtype=np.float32)
	std = np.array(self.std, dtype=np.float32)
	invstd = 1. / std
	for v, m, s in zip(image, mean, invstd):
	v.__isub__(m).__imul__(s)
	records['image'] = image
	return records


	@register_keypointop
	class TagGenerate(object):
	"""record gt coords for aeloss to sample coords value in tagmaps

	Args:
	num_joints (int): the keypoint numbers of dataset to train
	num_people (int): maxmum people to support for sample aeloss
	records(dict): the dict contained the image, mask and coords

	Returns:
	records(dict): contain the gt coords used in tagmap

	"""

	def __init__(self, num_joints, max_people=30):
	super(TagGenerate, self).__init__()
	self.max_people = max_people
	self.num_joints = num_joints

	def __call__(self, records):
	kpts_lst = records['gt_joints']
	kpts = kpts_lst[0]
	tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
	inds = np.where(kpts[..., 2] > 0)
	p, j = inds[0], inds[1]
	visible = kpts[inds]
	# tagmap is [p, j, 3], where last dim is j, y, x
	tagmap[p, j, 0] = j
	tagmap[p, j, 1] = visible[..., 1] # y
	tagmap[p, j, 2] = visible[..., 0] # x
	tagmap[p, j, 3] = 1
	records['tagmap'] = tagmap
	del records['gt_joints']
	return records


	@register_keypointop
	class ToHeatmaps(object):
	"""to generate the gaussin heatmaps of keypoint for heatmap loss

	Args:
	num_joints (int): the keypoint numbers of dataset to train
	hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
	sigma (float): the std of gaussin kernel genereted
	records(dict): the dict contained the image, mask and coords

	Returns:
	records(dict): contain the heatmaps used to heatmaploss

	"""

	def __init__(self, num_joints, hmsize, sigma=None):
	super(ToHeatmaps, self).__init__()
	self.num_joints = num_joints
	self.hmsize = np.array(hmsize)
	if sigma is None:
	sigma = hmsize[0] // 64
	self.sigma = sigma

	r = 6 * sigma + 3
	x = np.arange(0, r, 1, np.float32)
	y = x[:, None]
	x0, y0 = 3 * sigma + 1, 3 * sigma + 1
	self.gaussian = np.exp(-((x - x0)2 + (y - y0)2) / (2 * sigma**2))

	def __call__(self, records):
	kpts_lst = records['gt_joints']
	mask_lst = records['mask']
	for idx, hmsize in enumerate(self.hmsize):
	mask = mask_lst[idx]
	kpts = kpts_lst[idx]
	heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
	inds = np.where(kpts[..., 2] > 0)
	visible = kpts[inds].astype(np.int64)[..., :2]
	ul = np.round(visible - 3 * self.sigma - 1)
	br = np.round(visible + 3 * self.sigma + 2)
	sul = np.maximum(0, -ul)
	sbr = np.minimum(hmsize, br) - ul
	dul = np.clip(ul, 0, hmsize - 1)
	dbr = np.clip(br, 0, hmsize)
	for i in range(len(visible)):
	if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][
	0] >= hmsize or visible[i][1] >= hmsize:
	continue
	dx1, dy1 = dul[i]
	dx2, dy2 = dbr[i]
	sx1, sy1 = sul[i]
	sx2, sy2 = sbr[i]
	heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
	self.gaussian[sy1:sy2, sx1:sx2],
	heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
	records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
	records['mask_{}x'.format(idx + 1)] = mask
	del records['mask']
	return records


	@register_keypointop
	class RandomFlipHalfBodyTransform(object):
	"""apply data augment to image and coords
	to achieve the flip, scale, rotate and half body transform effect for training image

	Args:
	trainsize (list):[w, h], Image target size
	upper_body_ids (list): The upper body joint ids
	flip_pairs (list): The left-right joints exchange order list
	pixel_std (int): The pixel std of the scale
	scale (float): The scale factor to transform the image
	rot (int): The rotate factor to transform the image
	num_joints_half_body (int): The joints threshold of the half body transform
	prob_half_body (float): The threshold of the half body transform
	flip (bool): Whether to flip the image

	Returns:
	records(dict): contain the image and coords after tranformed

	"""

	def __init__(self,
	trainsize,
	upper_body_ids,
	flip_pairs,
	pixel_std,
	scale=0.35,
	rot=40,
	num_joints_half_body=8,
	prob_half_body=0.3,
	flip=True,
	rot_prob=0.6):
	super(RandomFlipHalfBodyTransform, self).__init__()
	self.trainsize = trainsize
	self.upper_body_ids = upper_body_ids
	self.flip_pairs = flip_pairs
	self.pixel_std = pixel_std
	self.scale = scale
	self.rot = rot
	self.num_joints_half_body = num_joints_half_body
	self.prob_half_body = prob_half_body
	self.flip = flip
	self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
	self.rot_prob = rot_prob

	def halfbody_transform(self, joints, joints_vis):
	upper_joints = []
	lower_joints = []
	for joint_id in range(joints.shape[0]):
	if joints_vis[joint_id][0] > 0:
	if joint_id in self.upper_body_ids:
	upper_joints.append(joints[joint_id])
	else:
	lower_joints.append(joints[joint_id])
	if np.random.randn() < 0.5 and len(upper_joints) > 2:
	selected_joints = upper_joints
	else:
	selected_joints = lower_joints if len(
	lower_joints) > 2 else upper_joints
	if len(selected_joints) < 2:
	return None, None
	selected_joints = np.array(selected_joints, dtype=np.float32)
	center = selected_joints.mean(axis=0)[:2]
	left_top = np.amin(selected_joints, axis=0)
	right_bottom = np.amax(selected_joints, axis=0)
	w = right_bottom[0] - left_top[0]
	h = right_bottom[1] - left_top[1]
	if w > self.aspect_ratio * h:
	h = w * 1.0 / self.aspect_ratio
	elif w < self.aspect_ratio * h:
	w = h * self.aspect_ratio
	scale = np.array(
	[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
	dtype=np.float32)
	scale = scale * 1.5

	return center, scale

	def flip_joints(self, joints, joints_vis, width, matched_parts):
	joints[:, 0] = width - joints[:, 0] - 1
	for pair in matched_parts:
	joints[pair[0], :], joints[pair[1], :] = \
	joints[pair[1], :], joints[pair[0], :].copy()
	joints_vis[pair[0], :], joints_vis[pair[1], :] = \
	joints_vis[pair[1], :], joints_vis[pair[0], :].copy()

	return joints * joints_vis, joints_vis

	def __call__(self, records):
	image = records['image']
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	c = records['center']
	s = records['scale']
	r = 0
	if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
	np.random.rand() < self.prob_half_body):
	c_half_body, s_half_body = self.halfbody_transform(joints,
	joints_vis)
	if c_half_body is not None and s_half_body is not None:
	c, s = c_half_body, s_half_body
	sf = self.scale
	rf = self.rot
	s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
	r = np.clip(np.random.randn() * rf, -rf * 2,
	rf * 2) if np.random.random() <= self.rot_prob else 0

	if self.flip and np.random.random() <= 0.5:
	image = image[:, ::-1, :]
	joints, joints_vis = self.flip_joints(
	joints, joints_vis, image.shape[1], self.flip_pairs)
	c[0] = image.shape[1] - c[0] - 1
	records['image'] = image
	records['gt_joints'] = joints
	records['joints_vis'] = joints_vis
	records['center'] = c
	records['scale'] = s
	records['rotate'] = r

	return records


	@register_keypointop
	class AugmentationbyInformantionDropping(object):
	"""AID: Augmentation by Informantion Dropping. Please refer
	to https://arxiv.org/abs/2008.07139

	Args:
	prob_cutout (float): The probability of the Cutout augmentation.
	offset_factor (float): Offset factor of cutout center.
	num_patch (int): Number of patches to be cutout.
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self,
	trainsize,
	prob_cutout=0.0,
	offset_factor=0.2,
	num_patch=1):
	self.prob_cutout = prob_cutout
	self.offset_factor = offset_factor
	self.num_patch = num_patch
	self.trainsize = trainsize

	def _cutout(self, img, joints, joints_vis):
	height, width, _ = img.shape
	img = img.reshape((height * width, -1))
	feat_x_int = np.arange(0, width)
	feat_y_int = np.arange(0, height)
	feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
	feat_x_int = feat_x_int.reshape((-1, ))
	feat_y_int = feat_y_int.reshape((-1, ))
	for _ in range(self.num_patch):
	vis_idx, _ = np.where(joints_vis > 0)
	occlusion_joint_id = np.random.choice(vis_idx)
	center = joints[occlusion_joint_id, 0:2]
	offset = np.random.randn(2) * self.trainsize[0] * self.offset_factor
	center = center + offset
	radius = np.random.uniform(0.1, 0.2) * self.trainsize[0]
	x_offset = (center[0] - feat_x_int) / radius
	y_offset = (center[1] - feat_y_int) / radius
	dis = x_offset2 + y_offset2
	keep_pos = np.where((dis <= 1) & (dis >= 0))[0]
	img[keep_pos, :] = 0
	img = img.reshape((height, width, -1))
	return img

	def __call__(self, records):
	img = records['image']
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	if np.random.rand() < self.prob_cutout:
	img = self._cutout(img, joints, joints_vis)
	records['image'] = img
	return records


	@register_keypointop
	class TopDownAffine(object):
	"""apply affine transform to image and coords

	Args:
	trainsize (list): [w, h], the standard size used to train
	use_udp (bool): whether to use Unbiased Data Processing.
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self, trainsize, use_udp=False):
	self.trainsize = trainsize
	self.use_udp = use_udp

	def __call__(self, records):
	image = records['image']
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	rot = records['rotate'] if "rotate" in records else 0
	if self.use_udp:
	trans = get_warp_matrix(
	rot, records['center'] * 2.0,
	[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
	records['scale'] * 200.0)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
	else:
	trans = get_affine_transform(records['center'], records['scale'] *
	200, rot, self.trainsize)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	for i in range(joints.shape[0]):
	if joints_vis[i, 0] > 0.0:
	joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

	records['image'] = image
	records['gt_joints'] = joints

	return records


	@register_keypointop
	class SinglePoseAffine(object):
	"""apply affine transform to image and coords

	Args:
	trainsize (list): [w, h], the standard size used to train
	use_udp (bool): whether to use Unbiased Data Processing.
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self,
	trainsize,
	rotate=[1.0, 30],
	scale=[1.0, 0.25],
	use_udp=False):
	self.trainsize = trainsize
	self.use_udp = use_udp
	self.rot_prob = rotate[0]
	self.rot_range = rotate[1]
	self.scale_prob = scale[0]
	self.scale_ratio = scale[1]

	def __call__(self, records):
	image = records['image']
	if 'joints_2d' in records:
	joints = records['joints_2d'] if 'joints_2d' in records else None
	joints_vis = records[
	'joints_vis'] if 'joints_vis' in records else np.ones(
	(len(joints), 1))
	rot = 0
	s = 1.
	if np.random.random() < self.rot_prob:
	rot = np.clip(np.random.randn() * self.rot_range,
	-self.rot_range * 2, self.rot_range * 2)
	if np.random.random() < self.scale_prob:
	s = np.clip(np.random.randn() * self.scale_ratio + 1,
	1 - self.scale_ratio, 1 + self.scale_ratio)

	if self.use_udp:
	trans = get_warp_matrix(
	rot,
	np.array(records['bbox_center']) * 2.0,
	[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
	records['bbox_scale'] * 200.0 * s)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	if 'joints_2d' in records:
	joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(),
	trans)
	else:
	trans = get_affine_transform(
	np.array(records['bbox_center']),
	records['bbox_scale'] * s * 200, rot, self.trainsize)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	if 'joints_2d' in records:
	for i in range(len(joints)):
	if joints_vis[i, 0] > 0.0:
	joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

	if 'joints_3d' in records:
	pose3d = records['joints_3d']
	if not rot == 0:
	trans_3djoints = np.eye(3)
	rot_rad = -rot * np.pi / 180
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	trans_3djoints[0, :2] = [cs, -sn]
	trans_3djoints[1, :2] = [sn, cs]
	pose3d[:, :3] = np.einsum('ij,kj->ki', trans_3djoints,
	pose3d[:, :3])
	records['joints_3d'] = pose3d

	records['image'] = image
	if 'joints_2d' in records:
	records['joints_2d'] = joints

	return records


	@register_keypointop
	class NoiseJitter(object):
	"""apply NoiseJitter to image

	Args:
	noise_factor (float): the noise factor ratio used to generate the jitter

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self, noise_factor=0.4):
	self.noise_factor = noise_factor

	def __call__(self, records):
	self.pn = np.random.uniform(1 - self.noise_factor,
	1 + self.noise_factor, 3)
	rgb_img = records['image']
	rgb_img[:, :, 0] = np.minimum(
	255.0, np.maximum(0.0, rgb_img[:, :, 0] * self.pn[0]))
	rgb_img[:, :, 1] = np.minimum(
	255.0, np.maximum(0.0, rgb_img[:, :, 1] * self.pn[1]))
	rgb_img[:, :, 2] = np.minimum(
	255.0, np.maximum(0.0, rgb_img[:, :, 2] * self.pn[2]))
	records['image'] = rgb_img
	return records


	@register_keypointop
	class FlipPose(object):
	"""random apply flip to image

	Args:
	noise_factor (float): the noise factor ratio used to generate the jitter

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self, flip_prob=0.5, img_res=224, num_joints=14):
	self.flip_pob = flip_prob
	self.img_res = img_res
	if num_joints == 24:
	self.perm = [
	5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17,
	18, 19, 21, 20, 23, 22
	]
	elif num_joints == 14:
	self.perm = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13]
	else:
	print("error num_joints in flip :{}".format(num_joints))

	def __call__(self, records):

	if np.random.random() < self.flip_pob:
	img = records['image']
	img = np.fliplr(img)

	if 'joints_2d' in records:
	joints_2d = records['joints_2d']
	joints_2d = joints_2d[self.perm]
	joints_2d[:, 0] = self.img_res - joints_2d[:, 0]
	records['joints_2d'] = joints_2d

	if 'joints_3d' in records:
	joints_3d = records['joints_3d']
	joints_3d = joints_3d[self.perm]
	joints_3d[:, 0] = -joints_3d[:, 0]
	records['joints_3d'] = joints_3d

	records['image'] = img
	return records


	@register_keypointop
	class TopDownEvalAffine(object):
	"""apply affine transform to image and coords

	Args:
	trainsize (list): [w, h], the standard size used to train
	use_udp (bool): whether to use Unbiased Data Processing.
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the image and coords after tranformed

	"""

	def __init__(self, trainsize, use_udp=False):
	self.trainsize = trainsize
	self.use_udp = use_udp

	def __call__(self, records):
	image = records['image']
	rot = 0
	imshape = records['im_shape'][::-1]
	center = imshape / 2.
	scale = imshape

	if self.use_udp:
	trans = get_warp_matrix(
	rot, center * 2.0,
	[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	else:
	trans = get_affine_transform(center, scale, rot, self.trainsize)
	image = cv2.warpAffine(
	image,
	trans, (int(self.trainsize[0]), int(self.trainsize[1])),
	flags=cv2.INTER_LINEAR)
	records['image'] = image

	return records


	@register_keypointop
	class ToHeatmapsTopDown(object):
	"""to generate the gaussin heatmaps of keypoint for heatmap loss

	Args:
	hmsize (list): [w, h] output heatmap's size
	sigma (float): the std of gaussin kernel genereted
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the heatmaps used to heatmaploss

	"""

	def __init__(self, hmsize, sigma):
	super(ToHeatmapsTopDown, self).__init__()
	self.hmsize = np.array(hmsize)
	self.sigma = sigma

	def __call__(self, records):
	"""refer to
	https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
	Copyright (c) Microsoft, under the MIT License.
	"""
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	num_joints = joints.shape[0]
	image_size = np.array(
	[records['image'].shape[1], records['image'].shape[0]])
	target_weight = np.ones((num_joints, 1), dtype=np.float32)
	target_weight[:, 0] = joints_vis[:, 0]
	target = np.zeros(
	(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
	tmp_size = self.sigma * 3
	feat_stride = image_size / self.hmsize
	for joint_id in range(num_joints):
	mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
	mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
	# Check that any part of the gaussian is in-bounds
	ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
	br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
	if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
	0] < 0 or br[1] < 0:
	# If not, just return the image as is
	target_weight[joint_id] = 0
	continue
	# # Generate gaussian
	size = 2 * tmp_size + 1
	x = np.arange(0, size, 1, np.float32)
	y = x[:, np.newaxis]
	x0 = y0 = size // 2
	# The gaussian is not normalized, we want the center value to equal 1
	g = np.exp(-((x - x0)2 + (y - y0)2) / (2 * self.sigma**2))

	# Usable gaussian range
	g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
	g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
	# Image range
	img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
	img_y = max(0, ul[1]), min(br[1], self.hmsize[1])

	v = target_weight[joint_id]
	if v > 0.5:
	target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
	0]:g_y[1], g_x[0]:g_x[1]]
	records['target'] = target
	records['target_weight'] = target_weight
	del records['gt_joints'], records['joints_vis']

	return records


	@register_keypointop
	class ToHeatmapsTopDown_DARK(object):
	"""to generate the gaussin heatmaps of keypoint for heatmap loss

	Args:
	hmsize (list): [w, h] output heatmap's size
	sigma (float): the std of gaussin kernel genereted
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the heatmaps used to heatmaploss

	"""

	def __init__(self, hmsize, sigma):
	super(ToHeatmapsTopDown_DARK, self).__init__()
	self.hmsize = np.array(hmsize)
	self.sigma = sigma

	def __call__(self, records):
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	num_joints = joints.shape[0]
	image_size = np.array(
	[records['image'].shape[1], records['image'].shape[0]])
	target_weight = np.ones((num_joints, 1), dtype=np.float32)
	target_weight[:, 0] = joints_vis[:, 0]
	target = np.zeros(
	(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
	tmp_size = self.sigma * 3
	feat_stride = image_size / self.hmsize
	for joint_id in range(num_joints):
	mu_x = joints[joint_id][0] / feat_stride[0]
	mu_y = joints[joint_id][1] / feat_stride[1]
	# Check that any part of the gaussian is in-bounds
	ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
	br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
	if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
	0] < 0 or br[1] < 0:
	# If not, just return the image as is
	target_weight[joint_id] = 0
	continue

	x = np.arange(0, self.hmsize[0], 1, np.float32)
	y = np.arange(0, self.hmsize[1], 1, np.float32)
	y = y[:, np.newaxis]

	v = target_weight[joint_id]
	if v > 0.5:
	target[joint_id] = np.exp(-(
	(x - mu_x)2 + (y - mu_y)2) / (2 * self.sigma**2))
	records['target'] = target
	records['target_weight'] = target_weight
	del records['gt_joints'], records['joints_vis']

	return records


	@register_keypointop
	class ToHeatmapsTopDown_UDP(object):
	"""This code is based on:
	https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py

	to generate the gaussian heatmaps of keypoint for heatmap loss.
	ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
	for Human Pose Estimation (CVPR 2020).

	Args:
	hmsize (list): [w, h] output heatmap's size
	sigma (float): the std of gaussin kernel genereted
	records(dict): the dict contained the image and coords

	Returns:
	records (dict): contain the heatmaps used to heatmaploss
	"""

	def __init__(self, hmsize, sigma):
	super(ToHeatmapsTopDown_UDP, self).__init__()
	self.hmsize = np.array(hmsize)
	self.sigma = sigma

	def __call__(self, records):
	joints = records['gt_joints']
	joints_vis = records['joints_vis']
	num_joints = joints.shape[0]
	image_size = np.array(
	[records['image'].shape[1], records['image'].shape[0]])
	target_weight = np.ones((num_joints, 1), dtype=np.float32)
	target_weight[:, 0] = joints_vis[:, 0]
	target = np.zeros(
	(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
	tmp_size = self.sigma * 3
	size = 2 * tmp_size + 1
	x = np.arange(0, size, 1, np.float32)
	y = x[:, None]
	feat_stride = (image_size - 1.0) / (self.hmsize - 1.0)
	for joint_id in range(num_joints):
	mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
	mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
	# Check that any part of the gaussian is in-bounds
	ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
	br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
	if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
	0] < 0 or br[1] < 0:
	# If not, just return the image as is
	target_weight[joint_id] = 0
	continue

	mu_x_ac = joints[joint_id][0] / feat_stride[0]
	mu_y_ac = joints[joint_id][1] / feat_stride[1]
	x0 = y0 = size // 2
	x0 += mu_x_ac - mu_x
	y0 += mu_y_ac - mu_y
	g = np.exp(-((x - x0)2 + (y - y0)2) / (2 * self.sigma**2))
	# Usable gaussian range
	g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
	g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
	# Image range
	img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
	img_y = max(0, ul[1]), min(br[1], self.hmsize[1])

	v = target_weight[joint_id]
	if v > 0.5:
	target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
	0]:g_y[1], g_x[0]:g_x[1]]
	records['target'] = target
	records['target_weight'] = target_weight
	del records['gt_joints'], records['joints_vis']

	return records


	from typing import Optional, Tuple, Union, List
	import numbers


	def _scale_size(
	size: Tuple[int, int],
	scale: Union[float, int, tuple], ) -> Tuple[int, int]:
	"""Rescale a size by a ratio.

	Args:
	size (tuple[int]): (w, h).
	scale (float \| tuple(float)): Scaling factor.

	Returns:
	tuple[int]: scaled size.
	"""
	if isinstance(scale, (float, int)):
	scale = (scale, scale)
	w, h = size
	return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)


	def rescale_size(old_size: tuple,
	scale: Union[float, int, tuple],
	return_scale: bool=False) -> tuple:
	"""Calculate the new size to be rescaled to.

	Args:
	old_size (tuple[int]): The old size (w, h) of image.
	scale (float \| tuple[int]): The scaling factor or maximum size.
	If it is a float number, then the image will be rescaled by this
	factor, else if it is a tuple of 2 integers, then the image will
	be rescaled as large as possible within the scale.
	return_scale (bool): Whether to return the scaling factor besides the
	rescaled image size.

	Returns:
	tuple[int]: The new rescaled image size.
	"""
	w, h = old_size
	if isinstance(scale, (float, int)):
	if scale <= 0:
	raise ValueError(f'Invalid scale {scale}, must be positive.')
	scale_factor = scale
	elif isinstance(scale, list):
	max_long_edge = max(scale)
	max_short_edge = min(scale)
	scale_factor = min(max_long_edge / max(h, w),
	max_short_edge / min(h, w))
	else:
	raise TypeError(
	f'Scale must be a number or tuple of int, but got {type(scale)}')

	new_size = _scale_size((w, h), scale_factor)

	if return_scale:
	return new_size, scale_factor
	else:
	return new_size


	def imrescale(img: np.ndarray,
	scale: Union[float, Tuple[int, int]],
	return_scale: bool=False,
	interpolation: str='bilinear',
	backend: Optional[str]=None) -> Union[np.ndarray, Tuple[
	np.ndarray, float]]:
	"""Resize image while keeping the aspect ratio.

	Args:
	img (ndarray): The input image.
	scale (float \| tuple[int]): The scaling factor or maximum size.
	If it is a float number, then the image will be rescaled by this
	factor, else if it is a tuple of 2 integers, then the image will
	be rescaled as large as possible within the scale.
	return_scale (bool): Whether to return the scaling factor besides the
	rescaled image.
	interpolation (str): Same as :func:`resize`.
	backend (str \| None): Same as :func:`resize`.

	Returns:
	ndarray: The rescaled image.
	"""
	h, w = img.shape[:2]
	new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
	rescaled_img = imresize(
	img, new_size, interpolation=interpolation, backend=backend)
	if return_scale:
	return rescaled_img, scale_factor
	else:
	return rescaled_img


	def imresize(
	img: np.ndarray,
	size: Tuple[int, int],
	return_scale: bool=False,
	interpolation: str='bilinear',
	out: Optional[np.ndarray]=None,
	backend: Optional[str]=None,
	interp=cv2.INTER_LINEAR, ) -> Union[Tuple[np.ndarray, float, float],
	np.ndarray]:
	"""Resize image to a given size.

	Args:
	img (ndarray): The input image.
	size (tuple[int]): Target size (w, h).
	return_scale (bool): Whether to return `w_scale` and `h_scale`.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend.
	out (ndarray): The output destination.
	backend (str \| None): The image resize backend type. Options are `cv2`,
	`pillow`, `None`. If backend is None, the global imread_backend
	specified by ``mmcv.use_backend()`` will be used. Default: None.

	Returns:
	tuple \| ndarray: (`resized_img`, `w_scale`, `h_scale`) or
	`resized_img`.
	"""
	h, w = img.shape[:2]
	if backend is None:
	backend = imread_backend
	if backend not in ['cv2', 'pillow']:
	raise ValueError(f'backend: {backend} is not supported for resize.'
	f"Supported backends are 'cv2', 'pillow'")

	if backend == 'pillow':
	assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
	pil_image = Image.fromarray(img)
	pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
	resized_img = np.array(pil_image)
	else:
	resized_img = cv2.resize(img, size, dst=out, interpolation=interp)
	if not return_scale:
	return resized_img
	else:
	w_scale = size[0] / w
	h_scale = size[1] / h
	return resized_img, w_scale, h_scale


	class PETR_Resize:
	"""Resize images & bbox & mask.

	This transform resizes the input image to some scale. Bboxes and masks are
	then resized with the same scale factor. If the input dict contains the key
	"scale", then the scale in the input dict is used, otherwise the specified
	scale in the init method is used. If the input dict contains the key
	"scale_factor" (if MultiScaleFlipAug does not give img_scale but
	scale_factor), the actual scale will be computed by image shape and
	scale_factor.

	`img_scale` can either be a tuple (single-scale) or a list of tuple
	(multi-scale). There are 3 multiscale modes:

	- ``ratio_range is not None``: randomly sample a ratio from the ratio \
	range and multiply it with the image scale.
	- ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \
	sample a scale from the multiscale range.
	- ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \
	sample a scale from multiple scales.

	Args:
	img_scale (tuple or list[tuple]): Images scales for resizing.
	multiscale_mode (str): Either "range" or "value".
	ratio_range (tuple[float]): (min_ratio, max_ratio)
	keep_ratio (bool): Whether to keep the aspect ratio when resizing the
	image.
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
	These two backends generates slightly different results. Defaults
	to 'cv2'.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend.
	override (bool, optional): Whether to override `scale` and
	`scale_factor` so as to call resize twice. Default False. If True,
	after the first resizing, the existed `scale` and `scale_factor`
	will be ignored so the second resizing can be allowed.
	This option is a work-around for multiple times of resize in DETR.
	Defaults to False.
	"""

	def __init__(self,
	img_scale=None,
	multiscale_mode='range',
	ratio_range=None,
	keep_ratio=True,
	bbox_clip_border=True,
	backend='cv2',
	interpolation='bilinear',
	override=False,
	keypoint_clip_border=True):
	if img_scale is None:
	self.img_scale = None
	else:
	if isinstance(img_scale, list):
	self.img_scale = img_scale
	else:
	self.img_scale = [img_scale]
	assert isinstance(self.img_scale, list)

	if ratio_range is not None:
	# mode 1: given a scale and a range of image ratio
	assert len(self.img_scale) == 1
	else:
	# mode 2: given multiple scales or a range of scales
	assert multiscale_mode in ['value', 'range']

	self.backend = backend
	self.multiscale_mode = multiscale_mode
	self.ratio_range = ratio_range
	self.keep_ratio = keep_ratio
	# TODO: refactor the override option in Resize
	self.interpolation = interpolation
	self.override = override
	self.bbox_clip_border = bbox_clip_border
	self.keypoint_clip_border = keypoint_clip_border

	@staticmethod
	def random_select(img_scales):
	"""Randomly select an img_scale from given candidates.

	Args:
	img_scales (list[tuple]): Images scales for selection.

	Returns:
	(tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \
	where ``img_scale`` is the selected image scale and \
	``scale_idx`` is the selected index in the given candidates.
	"""

	assert isinstance(img_scales, list)
	scale_idx = np.random.randint(len(img_scales))
	img_scale = img_scales[scale_idx]
	return img_scale, scale_idx

	@staticmethod
	def random_sample(img_scales):
	"""Randomly sample an img_scale when ``multiscale_mode=='range'``.

	Args:
	img_scales (list[tuple]): Images scale range for sampling.
	There must be two tuples in img_scales, which specify the lower
	and upper bound of image scales.

	Returns:
	(tuple, None): Returns a tuple ``(img_scale, None)``, where \
	``img_scale`` is sampled scale and None is just a placeholder \
	to be consistent with :func:`random_select`.
	"""

	assert isinstance(img_scales, list) and len(img_scales) == 2
	img_scale_long = [max(s) for s in img_scales]
	img_scale_short = [min(s) for s in img_scales]
	long_edge = np.random.randint(
	min(img_scale_long), max(img_scale_long) + 1)
	short_edge = np.random.randint(
	min(img_scale_short), max(img_scale_short) + 1)
	img_scale = (long_edge, short_edge)
	return img_scale, None

	@staticmethod
	def random_sample_ratio(img_scale, ratio_range):
	"""Randomly sample an img_scale when ``ratio_range`` is specified.

	A ratio will be randomly sampled from the range specified by
	``ratio_range``. Then it would be multiplied with ``img_scale`` to
	generate sampled scale.

	Args:
	img_scale (list): Images scale base to multiply with ratio.
	ratio_range (tuple[float]): The minimum and maximum ratio to scale
	the ``img_scale``.

	Returns:
	(tuple, None): Returns a tuple ``(scale, None)``, where \
	``scale`` is sampled ratio multiplied with ``img_scale`` and \
	None is just a placeholder to be consistent with \
	:func:`random_select`.
	"""

	assert isinstance(img_scale, list) and len(img_scale) == 2
	min_ratio, max_ratio = ratio_range
	assert min_ratio <= max_ratio
	ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
	scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
	return scale, None

	def _random_scale(self, results):
	"""Randomly sample an img_scale according to ``ratio_range`` and
	``multiscale_mode``.

	If ``ratio_range`` is specified, a ratio will be sampled and be
	multiplied with ``img_scale``.
	If multiple scales are specified by ``img_scale``, a scale will be
	sampled according to ``multiscale_mode``.
	Otherwise, single scale will be used.

	Args:
	results (dict): Result dict from :obj:`dataset`.

	Returns:
	dict: Two new keys 'scale` and 'scale_idx` are added into \
	``results``, which would be used by subsequent pipelines.
	"""

	if self.ratio_range is not None:
	scale, scale_idx = self.random_sample_ratio(self.img_scale[0],
	self.ratio_range)
	elif len(self.img_scale) == 1:
	scale, scale_idx = self.img_scale[0], 0
	elif self.multiscale_mode == 'range':
	scale, scale_idx = self.random_sample(self.img_scale)
	elif self.multiscale_mode == 'value':
	scale, scale_idx = self.random_select(self.img_scale)
	else:
	raise NotImplementedError
	results['scale'] = scale
	results['scale_idx'] = scale_idx

	def _resize_img(self, results):
	"""Resize images with ``results['scale']``."""
	for key in ['image'] if 'image' in results else []:
	if self.keep_ratio:
	img, scale_factor = imrescale(
	results[key],
	results['scale'],
	return_scale=True,
	interpolation=self.interpolation,
	backend=self.backend)
	# the w_scale and h_scale has minor difference
	# a real fix should be done in the imrescale in the future
	new_h, new_w = img.shape[:2]
	h, w = results[key].shape[:2]
	w_scale = new_w / w
	h_scale = new_h / h
	else:
	img, w_scale, h_scale = imresize(
	results[key],
	results['scale'],
	return_scale=True,
	interpolation=self.interpolation,
	backend=self.backend)

	scale_factor = np.array(
	[w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
	results['im_shape'] = np.array(img.shape)
	# in case that there is no padding
	results['pad_shape'] = img.shape
	results['scale_factor'] = scale_factor
	results['keep_ratio'] = self.keep_ratio
	# img_pad = self.impad(img, shape=results['scale'])
	results[key] = img

	def _resize_bboxes(self, results):
	"""Resize bounding boxes with ``results['scale_factor']``."""
	for key in ['gt_bbox'] if 'gt_bbox' in results else []:
	bboxes = results[key] * results['scale_factor']
	if self.bbox_clip_border:
	img_shape = results['im_shape']
	bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
	bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
	results[key] = bboxes

	def _resize_masks(self, results):
	"""Resize masks with ``results['scale']``"""
	for key in ['mask'] if 'mask' in results else []:
	if results[key] is None:
	continue
	if self.keep_ratio:
	results[key] = results[key].rescale(results['scale'])
	else:
	results[key] = results[key].resize(results['im_shape'][:2])

	def _resize_seg(self, results):
	"""Resize semantic segmentation map with ``results['scale']``."""
	for key in ['seg'] if 'seg' in results else []:
	if self.keep_ratio:
	gt_seg = imrescale(
	results[key],
	results['scale'],
	interpolation='nearest',
	backend=self.backend)
	else:
	gt_seg = imresize(
	results[key],
	results['scale'],
	interpolation='nearest',
	backend=self.backend)
	results[key] = gt_seg

	def _resize_keypoints(self, results):
	"""Resize keypoints with ``results['scale_factor']``."""
	for key in ['gt_joints'] if 'gt_joints' in results else []:
	keypoints = results[key].copy()
	keypoints[..., 0] = keypoints[..., 0] * results['scale_factor'][0]
	keypoints[..., 1] = keypoints[..., 1] * results['scale_factor'][1]
	if self.keypoint_clip_border:
	img_shape = results['im_shape']
	keypoints[..., 0] = np.clip(keypoints[..., 0], 0, img_shape[1])
	keypoints[..., 1] = np.clip(keypoints[..., 1], 0, img_shape[0])
	results[key] = keypoints

	def _resize_areas(self, results):
	"""Resize mask areas with ``results['scale_factor']``."""
	for key in ['gt_areas'] if 'gt_areas' in results else []:
	areas = results[key].copy()
	areas = areas * results['scale_factor'][0] * results[
	'scale_factor'][1]
	results[key] = areas

	def __call__(self, results):
	"""Call function to resize images, bounding boxes, masks, semantic
	segmentation map.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \
	'keep_ratio' keys are added into result dict.
	"""
	if 'scale' not in results:
	if 'scale_factor' in results:
	img_shape = results['image'].shape[:2]
	scale_factor = results['scale_factor'][0]
	# assert isinstance(scale_factor, float)
	results['scale'] = [int(x * scale_factor)
	for x in img_shape][::-1]
	else:
	self._random_scale(results)
	else:
	if not self.override:
	assert 'scale_factor' not in results, (
	'scale and scale_factor cannot be both set.')
	else:
	results.pop('scale')
	if 'scale_factor' in results:
	results.pop('scale_factor')
	self._random_scale(results)

	self._resize_img(results)
	self._resize_bboxes(results)
	self._resize_masks(results)
	self._resize_seg(results)
	self._resize_keypoints(results)
	self._resize_areas(results)
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(img_scale={self.img_scale}, '
	repr_str += f'multiscale_mode={self.multiscale_mode}, '
	repr_str += f'ratio_range={self.ratio_range}, '
	repr_str += f'keep_ratio={self.keep_ratio}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	repr_str += f'keypoint_clip_border={self.keypoint_clip_border})'
	return repr_str