|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import |
|
|
|
|
|
try: |
|
|
from collections.abc import Sequence |
|
|
except Exception: |
|
|
from collections import Sequence |
|
|
|
|
|
import cv2 |
|
|
import numpy as np |
|
|
import math |
|
|
import copy |
|
|
|
|
|
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix |
|
|
from ppdet.core.workspace import serializable |
|
|
from ppdet.utils.logger import setup_logger |
|
|
logger = setup_logger(__name__) |
|
|
|
|
|
registered_ops = [] |
|
|
|
|
|
__all__ = [ |
|
|
'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps', |
|
|
'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform', |
|
|
'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK', |
|
|
'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine', |
|
|
'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter', |
|
|
'FlipPose', 'PETR_Resize' |
|
|
] |
|
|
|
|
|
|
|
|
def register_keypointop(cls): |
|
|
return serializable(cls) |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class KeyPointFlip(object): |
|
|
"""Get the fliped image by flip_prob. flip the coords also |
|
|
the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped |
|
|
|
|
|
Args: |
|
|
flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16] |
|
|
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
|
|
flip_prob (float): the ratio whether to flip the image |
|
|
records(dict): the dict contained the image, mask and coords |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the image, mask and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, flip_permutation, hmsize=None, flip_prob=0.5): |
|
|
super(KeyPointFlip, self).__init__() |
|
|
assert isinstance(flip_permutation, Sequence) |
|
|
self.flip_permutation = flip_permutation |
|
|
self.flip_prob = flip_prob |
|
|
self.hmsize = hmsize |
|
|
|
|
|
def _flipjoints(self, records, sizelst): |
|
|
''' |
|
|
records['gt_joints'] is Sequence in higherhrnet |
|
|
''' |
|
|
if not ('gt_joints' in records and len(records['gt_joints']) > 0): |
|
|
return records |
|
|
|
|
|
kpts_lst = records['gt_joints'] |
|
|
if isinstance(kpts_lst, Sequence): |
|
|
for idx, hmsize in enumerate(sizelst): |
|
|
if kpts_lst[idx].ndim == 3: |
|
|
kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation] |
|
|
else: |
|
|
kpts_lst[idx] = kpts_lst[idx][self.flip_permutation] |
|
|
kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0] |
|
|
else: |
|
|
hmsize = sizelst[0] |
|
|
if kpts_lst.ndim == 3: |
|
|
kpts_lst = kpts_lst[:, self.flip_permutation] |
|
|
else: |
|
|
kpts_lst = kpts_lst[self.flip_permutation] |
|
|
kpts_lst[..., 0] = hmsize - kpts_lst[..., 0] |
|
|
|
|
|
records['gt_joints'] = kpts_lst |
|
|
return records |
|
|
|
|
|
def _flipmask(self, records, sizelst): |
|
|
if not 'mask' in records: |
|
|
return records |
|
|
|
|
|
mask_lst = records['mask'] |
|
|
for idx, hmsize in enumerate(sizelst): |
|
|
if len(mask_lst) > idx: |
|
|
mask_lst[idx] = mask_lst[idx][:, ::-1] |
|
|
records['mask'] = mask_lst |
|
|
return records |
|
|
|
|
|
def _flipbbox(self, records, sizelst): |
|
|
if not 'gt_bbox' in records: |
|
|
return records |
|
|
|
|
|
bboxes = records['gt_bbox'] |
|
|
hmsize = sizelst[0] |
|
|
bboxes[:, 0::2] = hmsize - bboxes[:, 0::2][:, ::-1] |
|
|
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, hmsize) |
|
|
records['gt_bbox'] = bboxes |
|
|
return records |
|
|
|
|
|
def __call__(self, records): |
|
|
flip = np.random.random() < self.flip_prob |
|
|
if flip: |
|
|
image = records['image'] |
|
|
image = image[:, ::-1] |
|
|
records['image'] = image |
|
|
if self.hmsize is None: |
|
|
sizelst = [image.shape[1]] |
|
|
else: |
|
|
sizelst = self.hmsize |
|
|
self._flipjoints(records, sizelst) |
|
|
self._flipmask(records, sizelst) |
|
|
self._flipbbox(records, sizelst) |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class RandomAffine(object): |
|
|
"""apply affine transform to image, mask and coords |
|
|
to achieve the rotate, scale and shift effect for training image |
|
|
|
|
|
Args: |
|
|
max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree] |
|
|
max_scale (list[2]): the scale range to apply, transform range is [min, max] |
|
|
max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize] |
|
|
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
|
|
trainsize (list[2]): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard |
|
|
scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long' |
|
|
records(dict): the dict contained the image, mask and coords |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the image, mask and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
max_degree=30, |
|
|
scale=[0.75, 1.5], |
|
|
max_shift=0.2, |
|
|
hmsize=None, |
|
|
trainsize=[512, 512], |
|
|
scale_type='short', |
|
|
boldervalue=[114, 114, 114]): |
|
|
super(RandomAffine, self).__init__() |
|
|
self.max_degree = max_degree |
|
|
self.min_scale = scale[0] |
|
|
self.max_scale = scale[1] |
|
|
self.max_shift = max_shift |
|
|
self.hmsize = hmsize |
|
|
self.trainsize = trainsize |
|
|
self.scale_type = scale_type |
|
|
self.boldervalue = boldervalue |
|
|
|
|
|
def _get_affine_matrix_old(self, center, scale, res, rot=0): |
|
|
"""Generate transformation matrix.""" |
|
|
h = scale |
|
|
t = np.zeros((3, 3), dtype=np.float32) |
|
|
t[0, 0] = float(res[1]) / h |
|
|
t[1, 1] = float(res[0]) / h |
|
|
t[0, 2] = res[1] * (-float(center[0]) / h + .5) |
|
|
t[1, 2] = res[0] * (-float(center[1]) / h + .5) |
|
|
t[2, 2] = 1 |
|
|
if rot != 0: |
|
|
rot = -rot |
|
|
rot_mat = np.zeros((3, 3), dtype=np.float32) |
|
|
rot_rad = rot * np.pi / 180 |
|
|
sn, cs = np.sin(rot_rad), np.cos(rot_rad) |
|
|
rot_mat[0, :2] = [cs, -sn] |
|
|
rot_mat[1, :2] = [sn, cs] |
|
|
rot_mat[2, 2] = 1 |
|
|
|
|
|
t_mat = np.eye(3) |
|
|
t_mat[0, 2] = -res[1] / 2 |
|
|
t_mat[1, 2] = -res[0] / 2 |
|
|
t_inv = t_mat.copy() |
|
|
t_inv[:2, 2] *= -1 |
|
|
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) |
|
|
return t |
|
|
|
|
|
def _get_affine_matrix(self, center, scale, res, rot=0): |
|
|
"""Generate transformation matrix.""" |
|
|
w, h = scale |
|
|
t = np.zeros((3, 3), dtype=np.float32) |
|
|
t[0, 0] = float(res[0]) / w |
|
|
t[1, 1] = float(res[1]) / h |
|
|
t[0, 2] = res[0] * (-float(center[0]) / w + .5) |
|
|
t[1, 2] = res[1] * (-float(center[1]) / h + .5) |
|
|
t[2, 2] = 1 |
|
|
if rot != 0: |
|
|
rot = -rot |
|
|
rot_mat = np.zeros((3, 3), dtype=np.float32) |
|
|
rot_rad = rot * np.pi / 180 |
|
|
sn, cs = np.sin(rot_rad), np.cos(rot_rad) |
|
|
rot_mat[0, :2] = [cs, -sn] |
|
|
rot_mat[1, :2] = [sn, cs] |
|
|
rot_mat[2, 2] = 1 |
|
|
|
|
|
t_mat = np.eye(3) |
|
|
t_mat[0, 2] = -res[0] / 2 |
|
|
t_mat[1, 2] = -res[1] / 2 |
|
|
t_inv = t_mat.copy() |
|
|
t_inv[:2, 2] *= -1 |
|
|
t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) |
|
|
return t |
|
|
|
|
|
def _affine_joints_mask(self, |
|
|
degree, |
|
|
center, |
|
|
roi_size, |
|
|
dsize, |
|
|
keypoints=None, |
|
|
heatmap_mask=None, |
|
|
gt_bbox=None): |
|
|
kpts = None |
|
|
mask = None |
|
|
bbox = None |
|
|
mask_affine_mat = self._get_affine_matrix(center, roi_size, dsize, |
|
|
degree)[:2] |
|
|
if heatmap_mask is not None: |
|
|
mask = cv2.warpAffine(heatmap_mask, mask_affine_mat, dsize) |
|
|
mask = ((mask / 255) > 0.5).astype(np.float32) |
|
|
if keypoints is not None: |
|
|
kpts = copy.deepcopy(keypoints) |
|
|
kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(), |
|
|
mask_affine_mat) |
|
|
kpts[(kpts[..., 0]) > dsize[0], :] = 0 |
|
|
kpts[(kpts[..., 1]) > dsize[1], :] = 0 |
|
|
kpts[(kpts[..., 0]) < 0, :] = 0 |
|
|
kpts[(kpts[..., 1]) < 0, :] = 0 |
|
|
if gt_bbox is not None: |
|
|
temp_bbox = gt_bbox[:, [0, 3, 2, 1]] |
|
|
cat_bbox = np.concatenate((gt_bbox, temp_bbox), axis=-1) |
|
|
gt_bbox_warped = warp_affine_joints(cat_bbox, mask_affine_mat) |
|
|
bbox = np.zeros_like(gt_bbox) |
|
|
bbox[:, 0] = gt_bbox_warped[:, 0::2].min(1).clip(0, dsize[0]) |
|
|
bbox[:, 2] = gt_bbox_warped[:, 0::2].max(1).clip(0, dsize[0]) |
|
|
bbox[:, 1] = gt_bbox_warped[:, 1::2].min(1).clip(0, dsize[1]) |
|
|
bbox[:, 3] = gt_bbox_warped[:, 1::2].max(1).clip(0, dsize[1]) |
|
|
return kpts, mask, bbox |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
shape = np.array(image.shape[:2][::-1]) |
|
|
keypoints = None |
|
|
heatmap_mask = None |
|
|
gt_bbox = None |
|
|
if 'gt_joints' in records: |
|
|
keypoints = records['gt_joints'] |
|
|
|
|
|
if 'mask' in records: |
|
|
heatmap_mask = records['mask'] |
|
|
heatmap_mask *= 255 |
|
|
|
|
|
if 'gt_bbox' in records: |
|
|
gt_bbox = records['gt_bbox'] |
|
|
|
|
|
degree = (np.random.random() * 2 - 1) * self.max_degree |
|
|
center = center = np.array((np.array(shape) / 2)) |
|
|
|
|
|
aug_scale = np.random.random() * (self.max_scale - self.min_scale |
|
|
) + self.min_scale |
|
|
if self.scale_type == 'long': |
|
|
scale = np.array([max(shape[0], shape[1]) / 1.0] * 2) |
|
|
elif self.scale_type == 'short': |
|
|
scale = np.array([min(shape[0], shape[1]) / 1.0] * 2) |
|
|
elif self.scale_type == 'wh': |
|
|
scale = shape |
|
|
else: |
|
|
raise ValueError('Unknown scale type: {}'.format(self.scale_type)) |
|
|
roi_size = aug_scale * scale |
|
|
dx = int(0) |
|
|
dy = int(0) |
|
|
if self.max_shift > 0: |
|
|
|
|
|
dx = np.random.randint(-self.max_shift * roi_size[0], |
|
|
self.max_shift * roi_size[0]) |
|
|
dy = np.random.randint(-self.max_shift * roi_size[0], |
|
|
self.max_shift * roi_size[1]) |
|
|
|
|
|
center += np.array([dx, dy]) |
|
|
input_size = 2 * center |
|
|
if self.trainsize != -1: |
|
|
dsize = self.trainsize |
|
|
imgshape = (dsize) |
|
|
else: |
|
|
dsize = scale |
|
|
imgshape = (shape.tolist()) |
|
|
|
|
|
image_affine_mat = self._get_affine_matrix(center, roi_size, dsize, |
|
|
degree)[:2] |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
image_affine_mat, |
|
|
imgshape, |
|
|
flags=cv2.INTER_LINEAR, |
|
|
borderValue=self.boldervalue) |
|
|
|
|
|
if self.hmsize is None: |
|
|
kpts, mask, gt_bbox = self._affine_joints_mask( |
|
|
degree, center, roi_size, dsize, keypoints, heatmap_mask, |
|
|
gt_bbox) |
|
|
records['image'] = image |
|
|
if kpts is not None: records['gt_joints'] = kpts |
|
|
if mask is not None: records['mask'] = mask |
|
|
if gt_bbox is not None: records['gt_bbox'] = gt_bbox |
|
|
return records |
|
|
|
|
|
kpts_lst = [] |
|
|
mask_lst = [] |
|
|
for hmsize in self.hmsize: |
|
|
kpts, mask, gt_bbox = self._affine_joints_mask( |
|
|
degree, center, roi_size, [hmsize, hmsize], keypoints, |
|
|
heatmap_mask, gt_bbox) |
|
|
kpts_lst.append(kpts) |
|
|
mask_lst.append(mask) |
|
|
records['image'] = image |
|
|
|
|
|
if 'gt_joints' in records: |
|
|
records['gt_joints'] = kpts_lst |
|
|
if 'mask' in records: |
|
|
records['mask'] = mask_lst |
|
|
if 'gt_bbox' in records: |
|
|
records['gt_bbox'] = gt_bbox |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class EvalAffine(object): |
|
|
"""apply affine transform to image |
|
|
resize the short of [h,w] to standard size for eval |
|
|
|
|
|
Args: |
|
|
size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard |
|
|
records(dict): the dict contained the image, mask and coords |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the image, mask and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, size, stride=64): |
|
|
super(EvalAffine, self).__init__() |
|
|
self.size = size |
|
|
self.stride = stride |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
mask = records['mask'] if 'mask' in records else None |
|
|
s = self.size |
|
|
h, w, _ = image.shape |
|
|
trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False) |
|
|
image_resized = cv2.warpAffine(image, trans, size_resized) |
|
|
if mask is not None: |
|
|
mask = cv2.warpAffine(mask, trans, size_resized) |
|
|
records['mask'] = mask |
|
|
if 'gt_joints' in records: |
|
|
del records['gt_joints'] |
|
|
records['image'] = image_resized |
|
|
records['scale_factor'] = self.size / min(h, w) |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class NormalizePermute(object): |
|
|
def __init__(self, |
|
|
mean=[123.675, 116.28, 103.53], |
|
|
std=[58.395, 57.120, 57.375], |
|
|
is_scale=True): |
|
|
super(NormalizePermute, self).__init__() |
|
|
self.mean = mean |
|
|
self.std = std |
|
|
self.is_scale = is_scale |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
image = image.astype(np.float32) |
|
|
if self.is_scale: |
|
|
image /= 255. |
|
|
image = image.transpose((2, 0, 1)) |
|
|
mean = np.array(self.mean, dtype=np.float32) |
|
|
std = np.array(self.std, dtype=np.float32) |
|
|
invstd = 1. / std |
|
|
for v, m, s in zip(image, mean, invstd): |
|
|
v.__isub__(m).__imul__(s) |
|
|
records['image'] = image |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class TagGenerate(object): |
|
|
"""record gt coords for aeloss to sample coords value in tagmaps |
|
|
|
|
|
Args: |
|
|
num_joints (int): the keypoint numbers of dataset to train |
|
|
num_people (int): maxmum people to support for sample aeloss |
|
|
records(dict): the dict contained the image, mask and coords |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the gt coords used in tagmap |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, num_joints, max_people=30): |
|
|
super(TagGenerate, self).__init__() |
|
|
self.max_people = max_people |
|
|
self.num_joints = num_joints |
|
|
|
|
|
def __call__(self, records): |
|
|
kpts_lst = records['gt_joints'] |
|
|
kpts = kpts_lst[0] |
|
|
tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64) |
|
|
inds = np.where(kpts[..., 2] > 0) |
|
|
p, j = inds[0], inds[1] |
|
|
visible = kpts[inds] |
|
|
|
|
|
tagmap[p, j, 0] = j |
|
|
tagmap[p, j, 1] = visible[..., 1] |
|
|
tagmap[p, j, 2] = visible[..., 0] |
|
|
tagmap[p, j, 3] = 1 |
|
|
records['tagmap'] = tagmap |
|
|
del records['gt_joints'] |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class ToHeatmaps(object): |
|
|
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
|
|
|
|
|
Args: |
|
|
num_joints (int): the keypoint numbers of dataset to train |
|
|
hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet |
|
|
sigma (float): the std of gaussin kernel genereted |
|
|
records(dict): the dict contained the image, mask and coords |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the heatmaps used to heatmaploss |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, num_joints, hmsize, sigma=None): |
|
|
super(ToHeatmaps, self).__init__() |
|
|
self.num_joints = num_joints |
|
|
self.hmsize = np.array(hmsize) |
|
|
if sigma is None: |
|
|
sigma = hmsize[0] // 64 |
|
|
self.sigma = sigma |
|
|
|
|
|
r = 6 * sigma + 3 |
|
|
x = np.arange(0, r, 1, np.float32) |
|
|
y = x[:, None] |
|
|
x0, y0 = 3 * sigma + 1, 3 * sigma + 1 |
|
|
self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2)) |
|
|
|
|
|
def __call__(self, records): |
|
|
kpts_lst = records['gt_joints'] |
|
|
mask_lst = records['mask'] |
|
|
for idx, hmsize in enumerate(self.hmsize): |
|
|
mask = mask_lst[idx] |
|
|
kpts = kpts_lst[idx] |
|
|
heatmaps = np.zeros((self.num_joints, hmsize, hmsize)) |
|
|
inds = np.where(kpts[..., 2] > 0) |
|
|
visible = kpts[inds].astype(np.int64)[..., :2] |
|
|
ul = np.round(visible - 3 * self.sigma - 1) |
|
|
br = np.round(visible + 3 * self.sigma + 2) |
|
|
sul = np.maximum(0, -ul) |
|
|
sbr = np.minimum(hmsize, br) - ul |
|
|
dul = np.clip(ul, 0, hmsize - 1) |
|
|
dbr = np.clip(br, 0, hmsize) |
|
|
for i in range(len(visible)): |
|
|
if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][ |
|
|
0] >= hmsize or visible[i][1] >= hmsize: |
|
|
continue |
|
|
dx1, dy1 = dul[i] |
|
|
dx2, dy2 = dbr[i] |
|
|
sx1, sy1 = sul[i] |
|
|
sx2, sy2 = sbr[i] |
|
|
heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum( |
|
|
self.gaussian[sy1:sy2, sx1:sx2], |
|
|
heatmaps[inds[1][i], dy1:dy2, dx1:dx2]) |
|
|
records['heatmap_gt{}x'.format(idx + 1)] = heatmaps |
|
|
records['mask_{}x'.format(idx + 1)] = mask |
|
|
del records['mask'] |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class RandomFlipHalfBodyTransform(object): |
|
|
"""apply data augment to image and coords |
|
|
to achieve the flip, scale, rotate and half body transform effect for training image |
|
|
|
|
|
Args: |
|
|
trainsize (list):[w, h], Image target size |
|
|
upper_body_ids (list): The upper body joint ids |
|
|
flip_pairs (list): The left-right joints exchange order list |
|
|
pixel_std (int): The pixel std of the scale |
|
|
scale (float): The scale factor to transform the image |
|
|
rot (int): The rotate factor to transform the image |
|
|
num_joints_half_body (int): The joints threshold of the half body transform |
|
|
prob_half_body (float): The threshold of the half body transform |
|
|
flip (bool): Whether to flip the image |
|
|
|
|
|
Returns: |
|
|
records(dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
trainsize, |
|
|
upper_body_ids, |
|
|
flip_pairs, |
|
|
pixel_std, |
|
|
scale=0.35, |
|
|
rot=40, |
|
|
num_joints_half_body=8, |
|
|
prob_half_body=0.3, |
|
|
flip=True, |
|
|
rot_prob=0.6): |
|
|
super(RandomFlipHalfBodyTransform, self).__init__() |
|
|
self.trainsize = trainsize |
|
|
self.upper_body_ids = upper_body_ids |
|
|
self.flip_pairs = flip_pairs |
|
|
self.pixel_std = pixel_std |
|
|
self.scale = scale |
|
|
self.rot = rot |
|
|
self.num_joints_half_body = num_joints_half_body |
|
|
self.prob_half_body = prob_half_body |
|
|
self.flip = flip |
|
|
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1] |
|
|
self.rot_prob = rot_prob |
|
|
|
|
|
def halfbody_transform(self, joints, joints_vis): |
|
|
upper_joints = [] |
|
|
lower_joints = [] |
|
|
for joint_id in range(joints.shape[0]): |
|
|
if joints_vis[joint_id][0] > 0: |
|
|
if joint_id in self.upper_body_ids: |
|
|
upper_joints.append(joints[joint_id]) |
|
|
else: |
|
|
lower_joints.append(joints[joint_id]) |
|
|
if np.random.randn() < 0.5 and len(upper_joints) > 2: |
|
|
selected_joints = upper_joints |
|
|
else: |
|
|
selected_joints = lower_joints if len( |
|
|
lower_joints) > 2 else upper_joints |
|
|
if len(selected_joints) < 2: |
|
|
return None, None |
|
|
selected_joints = np.array(selected_joints, dtype=np.float32) |
|
|
center = selected_joints.mean(axis=0)[:2] |
|
|
left_top = np.amin(selected_joints, axis=0) |
|
|
right_bottom = np.amax(selected_joints, axis=0) |
|
|
w = right_bottom[0] - left_top[0] |
|
|
h = right_bottom[1] - left_top[1] |
|
|
if w > self.aspect_ratio * h: |
|
|
h = w * 1.0 / self.aspect_ratio |
|
|
elif w < self.aspect_ratio * h: |
|
|
w = h * self.aspect_ratio |
|
|
scale = np.array( |
|
|
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], |
|
|
dtype=np.float32) |
|
|
scale = scale * 1.5 |
|
|
|
|
|
return center, scale |
|
|
|
|
|
def flip_joints(self, joints, joints_vis, width, matched_parts): |
|
|
joints[:, 0] = width - joints[:, 0] - 1 |
|
|
for pair in matched_parts: |
|
|
joints[pair[0], :], joints[pair[1], :] = \ |
|
|
joints[pair[1], :], joints[pair[0], :].copy() |
|
|
joints_vis[pair[0], :], joints_vis[pair[1], :] = \ |
|
|
joints_vis[pair[1], :], joints_vis[pair[0], :].copy() |
|
|
|
|
|
return joints * joints_vis, joints_vis |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
c = records['center'] |
|
|
s = records['scale'] |
|
|
r = 0 |
|
|
if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and |
|
|
np.random.rand() < self.prob_half_body): |
|
|
c_half_body, s_half_body = self.halfbody_transform(joints, |
|
|
joints_vis) |
|
|
if c_half_body is not None and s_half_body is not None: |
|
|
c, s = c_half_body, s_half_body |
|
|
sf = self.scale |
|
|
rf = self.rot |
|
|
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) |
|
|
r = np.clip(np.random.randn() * rf, -rf * 2, |
|
|
rf * 2) if np.random.random() <= self.rot_prob else 0 |
|
|
|
|
|
if self.flip and np.random.random() <= 0.5: |
|
|
image = image[:, ::-1, :] |
|
|
joints, joints_vis = self.flip_joints( |
|
|
joints, joints_vis, image.shape[1], self.flip_pairs) |
|
|
c[0] = image.shape[1] - c[0] - 1 |
|
|
records['image'] = image |
|
|
records['gt_joints'] = joints |
|
|
records['joints_vis'] = joints_vis |
|
|
records['center'] = c |
|
|
records['scale'] = s |
|
|
records['rotate'] = r |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class AugmentationbyInformantionDropping(object): |
|
|
"""AID: Augmentation by Informantion Dropping. Please refer |
|
|
to https://arxiv.org/abs/2008.07139 |
|
|
|
|
|
Args: |
|
|
prob_cutout (float): The probability of the Cutout augmentation. |
|
|
offset_factor (float): Offset factor of cutout center. |
|
|
num_patch (int): Number of patches to be cutout. |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
trainsize, |
|
|
prob_cutout=0.0, |
|
|
offset_factor=0.2, |
|
|
num_patch=1): |
|
|
self.prob_cutout = prob_cutout |
|
|
self.offset_factor = offset_factor |
|
|
self.num_patch = num_patch |
|
|
self.trainsize = trainsize |
|
|
|
|
|
def _cutout(self, img, joints, joints_vis): |
|
|
height, width, _ = img.shape |
|
|
img = img.reshape((height * width, -1)) |
|
|
feat_x_int = np.arange(0, width) |
|
|
feat_y_int = np.arange(0, height) |
|
|
feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int) |
|
|
feat_x_int = feat_x_int.reshape((-1, )) |
|
|
feat_y_int = feat_y_int.reshape((-1, )) |
|
|
for _ in range(self.num_patch): |
|
|
vis_idx, _ = np.where(joints_vis > 0) |
|
|
occlusion_joint_id = np.random.choice(vis_idx) |
|
|
center = joints[occlusion_joint_id, 0:2] |
|
|
offset = np.random.randn(2) * self.trainsize[0] * self.offset_factor |
|
|
center = center + offset |
|
|
radius = np.random.uniform(0.1, 0.2) * self.trainsize[0] |
|
|
x_offset = (center[0] - feat_x_int) / radius |
|
|
y_offset = (center[1] - feat_y_int) / radius |
|
|
dis = x_offset**2 + y_offset**2 |
|
|
keep_pos = np.where((dis <= 1) & (dis >= 0))[0] |
|
|
img[keep_pos, :] = 0 |
|
|
img = img.reshape((height, width, -1)) |
|
|
return img |
|
|
|
|
|
def __call__(self, records): |
|
|
img = records['image'] |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
if np.random.rand() < self.prob_cutout: |
|
|
img = self._cutout(img, joints, joints_vis) |
|
|
records['image'] = img |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class TopDownAffine(object): |
|
|
"""apply affine transform to image and coords |
|
|
|
|
|
Args: |
|
|
trainsize (list): [w, h], the standard size used to train |
|
|
use_udp (bool): whether to use Unbiased Data Processing. |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, trainsize, use_udp=False): |
|
|
self.trainsize = trainsize |
|
|
self.use_udp = use_udp |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
rot = records['rotate'] if "rotate" in records else 0 |
|
|
if self.use_udp: |
|
|
trans = get_warp_matrix( |
|
|
rot, records['center'] * 2.0, |
|
|
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], |
|
|
records['scale'] * 200.0) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans) |
|
|
else: |
|
|
trans = get_affine_transform(records['center'], records['scale'] * |
|
|
200, rot, self.trainsize) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
for i in range(joints.shape[0]): |
|
|
if joints_vis[i, 0] > 0.0: |
|
|
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) |
|
|
|
|
|
records['image'] = image |
|
|
records['gt_joints'] = joints |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class SinglePoseAffine(object): |
|
|
"""apply affine transform to image and coords |
|
|
|
|
|
Args: |
|
|
trainsize (list): [w, h], the standard size used to train |
|
|
use_udp (bool): whether to use Unbiased Data Processing. |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
trainsize, |
|
|
rotate=[1.0, 30], |
|
|
scale=[1.0, 0.25], |
|
|
use_udp=False): |
|
|
self.trainsize = trainsize |
|
|
self.use_udp = use_udp |
|
|
self.rot_prob = rotate[0] |
|
|
self.rot_range = rotate[1] |
|
|
self.scale_prob = scale[0] |
|
|
self.scale_ratio = scale[1] |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
if 'joints_2d' in records: |
|
|
joints = records['joints_2d'] if 'joints_2d' in records else None |
|
|
joints_vis = records[ |
|
|
'joints_vis'] if 'joints_vis' in records else np.ones( |
|
|
(len(joints), 1)) |
|
|
rot = 0 |
|
|
s = 1. |
|
|
if np.random.random() < self.rot_prob: |
|
|
rot = np.clip(np.random.randn() * self.rot_range, |
|
|
-self.rot_range * 2, self.rot_range * 2) |
|
|
if np.random.random() < self.scale_prob: |
|
|
s = np.clip(np.random.randn() * self.scale_ratio + 1, |
|
|
1 - self.scale_ratio, 1 + self.scale_ratio) |
|
|
|
|
|
if self.use_udp: |
|
|
trans = get_warp_matrix( |
|
|
rot, |
|
|
np.array(records['bbox_center']) * 2.0, |
|
|
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], |
|
|
records['bbox_scale'] * 200.0 * s) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
if 'joints_2d' in records: |
|
|
joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), |
|
|
trans) |
|
|
else: |
|
|
trans = get_affine_transform( |
|
|
np.array(records['bbox_center']), |
|
|
records['bbox_scale'] * s * 200, rot, self.trainsize) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
if 'joints_2d' in records: |
|
|
for i in range(len(joints)): |
|
|
if joints_vis[i, 0] > 0.0: |
|
|
joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) |
|
|
|
|
|
if 'joints_3d' in records: |
|
|
pose3d = records['joints_3d'] |
|
|
if not rot == 0: |
|
|
trans_3djoints = np.eye(3) |
|
|
rot_rad = -rot * np.pi / 180 |
|
|
sn, cs = np.sin(rot_rad), np.cos(rot_rad) |
|
|
trans_3djoints[0, :2] = [cs, -sn] |
|
|
trans_3djoints[1, :2] = [sn, cs] |
|
|
pose3d[:, :3] = np.einsum('ij,kj->ki', trans_3djoints, |
|
|
pose3d[:, :3]) |
|
|
records['joints_3d'] = pose3d |
|
|
|
|
|
records['image'] = image |
|
|
if 'joints_2d' in records: |
|
|
records['joints_2d'] = joints |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class NoiseJitter(object): |
|
|
"""apply NoiseJitter to image |
|
|
|
|
|
Args: |
|
|
noise_factor (float): the noise factor ratio used to generate the jitter |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, noise_factor=0.4): |
|
|
self.noise_factor = noise_factor |
|
|
|
|
|
def __call__(self, records): |
|
|
self.pn = np.random.uniform(1 - self.noise_factor, |
|
|
1 + self.noise_factor, 3) |
|
|
rgb_img = records['image'] |
|
|
rgb_img[:, :, 0] = np.minimum( |
|
|
255.0, np.maximum(0.0, rgb_img[:, :, 0] * self.pn[0])) |
|
|
rgb_img[:, :, 1] = np.minimum( |
|
|
255.0, np.maximum(0.0, rgb_img[:, :, 1] * self.pn[1])) |
|
|
rgb_img[:, :, 2] = np.minimum( |
|
|
255.0, np.maximum(0.0, rgb_img[:, :, 2] * self.pn[2])) |
|
|
records['image'] = rgb_img |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class FlipPose(object): |
|
|
"""random apply flip to image |
|
|
|
|
|
Args: |
|
|
noise_factor (float): the noise factor ratio used to generate the jitter |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, flip_prob=0.5, img_res=224, num_joints=14): |
|
|
self.flip_pob = flip_prob |
|
|
self.img_res = img_res |
|
|
if num_joints == 24: |
|
|
self.perm = [ |
|
|
5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, |
|
|
18, 19, 21, 20, 23, 22 |
|
|
] |
|
|
elif num_joints == 14: |
|
|
self.perm = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13] |
|
|
else: |
|
|
print("error num_joints in flip :{}".format(num_joints)) |
|
|
|
|
|
def __call__(self, records): |
|
|
|
|
|
if np.random.random() < self.flip_pob: |
|
|
img = records['image'] |
|
|
img = np.fliplr(img) |
|
|
|
|
|
if 'joints_2d' in records: |
|
|
joints_2d = records['joints_2d'] |
|
|
joints_2d = joints_2d[self.perm] |
|
|
joints_2d[:, 0] = self.img_res - joints_2d[:, 0] |
|
|
records['joints_2d'] = joints_2d |
|
|
|
|
|
if 'joints_3d' in records: |
|
|
joints_3d = records['joints_3d'] |
|
|
joints_3d = joints_3d[self.perm] |
|
|
joints_3d[:, 0] = -joints_3d[:, 0] |
|
|
records['joints_3d'] = joints_3d |
|
|
|
|
|
records['image'] = img |
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class TopDownEvalAffine(object): |
|
|
"""apply affine transform to image and coords |
|
|
|
|
|
Args: |
|
|
trainsize (list): [w, h], the standard size used to train |
|
|
use_udp (bool): whether to use Unbiased Data Processing. |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the image and coords after tranformed |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, trainsize, use_udp=False): |
|
|
self.trainsize = trainsize |
|
|
self.use_udp = use_udp |
|
|
|
|
|
def __call__(self, records): |
|
|
image = records['image'] |
|
|
rot = 0 |
|
|
imshape = records['im_shape'][::-1] |
|
|
center = imshape / 2. |
|
|
scale = imshape |
|
|
|
|
|
if self.use_udp: |
|
|
trans = get_warp_matrix( |
|
|
rot, center * 2.0, |
|
|
[self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
else: |
|
|
trans = get_affine_transform(center, scale, rot, self.trainsize) |
|
|
image = cv2.warpAffine( |
|
|
image, |
|
|
trans, (int(self.trainsize[0]), int(self.trainsize[1])), |
|
|
flags=cv2.INTER_LINEAR) |
|
|
records['image'] = image |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class ToHeatmapsTopDown(object): |
|
|
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
|
|
|
|
|
Args: |
|
|
hmsize (list): [w, h] output heatmap's size |
|
|
sigma (float): the std of gaussin kernel genereted |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the heatmaps used to heatmaploss |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, hmsize, sigma): |
|
|
super(ToHeatmapsTopDown, self).__init__() |
|
|
self.hmsize = np.array(hmsize) |
|
|
self.sigma = sigma |
|
|
|
|
|
def __call__(self, records): |
|
|
"""refer to |
|
|
https://github.com/leoxiaobin/deep-high-resolution-net.pytorch |
|
|
Copyright (c) Microsoft, under the MIT License. |
|
|
""" |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
num_joints = joints.shape[0] |
|
|
image_size = np.array( |
|
|
[records['image'].shape[1], records['image'].shape[0]]) |
|
|
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
|
|
target_weight[:, 0] = joints_vis[:, 0] |
|
|
target = np.zeros( |
|
|
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
|
|
tmp_size = self.sigma * 3 |
|
|
feat_stride = image_size / self.hmsize |
|
|
for joint_id in range(num_joints): |
|
|
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) |
|
|
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) |
|
|
|
|
|
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
|
|
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
|
|
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
|
|
0] < 0 or br[1] < 0: |
|
|
|
|
|
target_weight[joint_id] = 0 |
|
|
continue |
|
|
|
|
|
size = 2 * tmp_size + 1 |
|
|
x = np.arange(0, size, 1, np.float32) |
|
|
y = x[:, np.newaxis] |
|
|
x0 = y0 = size // 2 |
|
|
|
|
|
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2)) |
|
|
|
|
|
|
|
|
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0] |
|
|
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1] |
|
|
|
|
|
img_x = max(0, ul[0]), min(br[0], self.hmsize[0]) |
|
|
img_y = max(0, ul[1]), min(br[1], self.hmsize[1]) |
|
|
|
|
|
v = target_weight[joint_id] |
|
|
if v > 0.5: |
|
|
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[ |
|
|
0]:g_y[1], g_x[0]:g_x[1]] |
|
|
records['target'] = target |
|
|
records['target_weight'] = target_weight |
|
|
del records['gt_joints'], records['joints_vis'] |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class ToHeatmapsTopDown_DARK(object): |
|
|
"""to generate the gaussin heatmaps of keypoint for heatmap loss |
|
|
|
|
|
Args: |
|
|
hmsize (list): [w, h] output heatmap's size |
|
|
sigma (float): the std of gaussin kernel genereted |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the heatmaps used to heatmaploss |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, hmsize, sigma): |
|
|
super(ToHeatmapsTopDown_DARK, self).__init__() |
|
|
self.hmsize = np.array(hmsize) |
|
|
self.sigma = sigma |
|
|
|
|
|
def __call__(self, records): |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
num_joints = joints.shape[0] |
|
|
image_size = np.array( |
|
|
[records['image'].shape[1], records['image'].shape[0]]) |
|
|
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
|
|
target_weight[:, 0] = joints_vis[:, 0] |
|
|
target = np.zeros( |
|
|
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
|
|
tmp_size = self.sigma * 3 |
|
|
feat_stride = image_size / self.hmsize |
|
|
for joint_id in range(num_joints): |
|
|
mu_x = joints[joint_id][0] / feat_stride[0] |
|
|
mu_y = joints[joint_id][1] / feat_stride[1] |
|
|
|
|
|
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
|
|
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
|
|
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
|
|
0] < 0 or br[1] < 0: |
|
|
|
|
|
target_weight[joint_id] = 0 |
|
|
continue |
|
|
|
|
|
x = np.arange(0, self.hmsize[0], 1, np.float32) |
|
|
y = np.arange(0, self.hmsize[1], 1, np.float32) |
|
|
y = y[:, np.newaxis] |
|
|
|
|
|
v = target_weight[joint_id] |
|
|
if v > 0.5: |
|
|
target[joint_id] = np.exp(-( |
|
|
(x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2)) |
|
|
records['target'] = target |
|
|
records['target_weight'] = target_weight |
|
|
del records['gt_joints'], records['joints_vis'] |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
@register_keypointop |
|
|
class ToHeatmapsTopDown_UDP(object): |
|
|
"""This code is based on: |
|
|
https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py |
|
|
|
|
|
to generate the gaussian heatmaps of keypoint for heatmap loss. |
|
|
ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing |
|
|
for Human Pose Estimation (CVPR 2020). |
|
|
|
|
|
Args: |
|
|
hmsize (list): [w, h] output heatmap's size |
|
|
sigma (float): the std of gaussin kernel genereted |
|
|
records(dict): the dict contained the image and coords |
|
|
|
|
|
Returns: |
|
|
records (dict): contain the heatmaps used to heatmaploss |
|
|
""" |
|
|
|
|
|
def __init__(self, hmsize, sigma): |
|
|
super(ToHeatmapsTopDown_UDP, self).__init__() |
|
|
self.hmsize = np.array(hmsize) |
|
|
self.sigma = sigma |
|
|
|
|
|
def __call__(self, records): |
|
|
joints = records['gt_joints'] |
|
|
joints_vis = records['joints_vis'] |
|
|
num_joints = joints.shape[0] |
|
|
image_size = np.array( |
|
|
[records['image'].shape[1], records['image'].shape[0]]) |
|
|
target_weight = np.ones((num_joints, 1), dtype=np.float32) |
|
|
target_weight[:, 0] = joints_vis[:, 0] |
|
|
target = np.zeros( |
|
|
(num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32) |
|
|
tmp_size = self.sigma * 3 |
|
|
size = 2 * tmp_size + 1 |
|
|
x = np.arange(0, size, 1, np.float32) |
|
|
y = x[:, None] |
|
|
feat_stride = (image_size - 1.0) / (self.hmsize - 1.0) |
|
|
for joint_id in range(num_joints): |
|
|
mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) |
|
|
mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) |
|
|
|
|
|
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] |
|
|
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] |
|
|
if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[ |
|
|
0] < 0 or br[1] < 0: |
|
|
|
|
|
target_weight[joint_id] = 0 |
|
|
continue |
|
|
|
|
|
mu_x_ac = joints[joint_id][0] / feat_stride[0] |
|
|
mu_y_ac = joints[joint_id][1] / feat_stride[1] |
|
|
x0 = y0 = size // 2 |
|
|
x0 += mu_x_ac - mu_x |
|
|
y0 += mu_y_ac - mu_y |
|
|
g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2)) |
|
|
|
|
|
g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0] |
|
|
g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1] |
|
|
|
|
|
img_x = max(0, ul[0]), min(br[0], self.hmsize[0]) |
|
|
img_y = max(0, ul[1]), min(br[1], self.hmsize[1]) |
|
|
|
|
|
v = target_weight[joint_id] |
|
|
if v > 0.5: |
|
|
target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[ |
|
|
0]:g_y[1], g_x[0]:g_x[1]] |
|
|
records['target'] = target |
|
|
records['target_weight'] = target_weight |
|
|
del records['gt_joints'], records['joints_vis'] |
|
|
|
|
|
return records |
|
|
|
|
|
|
|
|
from typing import Optional, Tuple, Union, List |
|
|
import numbers |
|
|
|
|
|
|
|
|
def _scale_size( |
|
|
size: Tuple[int, int], |
|
|
scale: Union[float, int, tuple], ) -> Tuple[int, int]: |
|
|
"""Rescale a size by a ratio. |
|
|
|
|
|
Args: |
|
|
size (tuple[int]): (w, h). |
|
|
scale (float | tuple(float)): Scaling factor. |
|
|
|
|
|
Returns: |
|
|
tuple[int]: scaled size. |
|
|
""" |
|
|
if isinstance(scale, (float, int)): |
|
|
scale = (scale, scale) |
|
|
w, h = size |
|
|
return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) |
|
|
|
|
|
|
|
|
def rescale_size(old_size: tuple, |
|
|
scale: Union[float, int, tuple], |
|
|
return_scale: bool=False) -> tuple: |
|
|
"""Calculate the new size to be rescaled to. |
|
|
|
|
|
Args: |
|
|
old_size (tuple[int]): The old size (w, h) of image. |
|
|
scale (float | tuple[int]): The scaling factor or maximum size. |
|
|
If it is a float number, then the image will be rescaled by this |
|
|
factor, else if it is a tuple of 2 integers, then the image will |
|
|
be rescaled as large as possible within the scale. |
|
|
return_scale (bool): Whether to return the scaling factor besides the |
|
|
rescaled image size. |
|
|
|
|
|
Returns: |
|
|
tuple[int]: The new rescaled image size. |
|
|
""" |
|
|
w, h = old_size |
|
|
if isinstance(scale, (float, int)): |
|
|
if scale <= 0: |
|
|
raise ValueError(f'Invalid scale {scale}, must be positive.') |
|
|
scale_factor = scale |
|
|
elif isinstance(scale, list): |
|
|
max_long_edge = max(scale) |
|
|
max_short_edge = min(scale) |
|
|
scale_factor = min(max_long_edge / max(h, w), |
|
|
max_short_edge / min(h, w)) |
|
|
else: |
|
|
raise TypeError( |
|
|
f'Scale must be a number or tuple of int, but got {type(scale)}') |
|
|
|
|
|
new_size = _scale_size((w, h), scale_factor) |
|
|
|
|
|
if return_scale: |
|
|
return new_size, scale_factor |
|
|
else: |
|
|
return new_size |
|
|
|
|
|
|
|
|
def imrescale(img: np.ndarray, |
|
|
scale: Union[float, Tuple[int, int]], |
|
|
return_scale: bool=False, |
|
|
interpolation: str='bilinear', |
|
|
backend: Optional[str]=None) -> Union[np.ndarray, Tuple[ |
|
|
np.ndarray, float]]: |
|
|
"""Resize image while keeping the aspect ratio. |
|
|
|
|
|
Args: |
|
|
img (ndarray): The input image. |
|
|
scale (float | tuple[int]): The scaling factor or maximum size. |
|
|
If it is a float number, then the image will be rescaled by this |
|
|
factor, else if it is a tuple of 2 integers, then the image will |
|
|
be rescaled as large as possible within the scale. |
|
|
return_scale (bool): Whether to return the scaling factor besides the |
|
|
rescaled image. |
|
|
interpolation (str): Same as :func:`resize`. |
|
|
backend (str | None): Same as :func:`resize`. |
|
|
|
|
|
Returns: |
|
|
ndarray: The rescaled image. |
|
|
""" |
|
|
h, w = img.shape[:2] |
|
|
new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) |
|
|
rescaled_img = imresize( |
|
|
img, new_size, interpolation=interpolation, backend=backend) |
|
|
if return_scale: |
|
|
return rescaled_img, scale_factor |
|
|
else: |
|
|
return rescaled_img |
|
|
|
|
|
|
|
|
def imresize( |
|
|
img: np.ndarray, |
|
|
size: Tuple[int, int], |
|
|
return_scale: bool=False, |
|
|
interpolation: str='bilinear', |
|
|
out: Optional[np.ndarray]=None, |
|
|
backend: Optional[str]=None, |
|
|
interp=cv2.INTER_LINEAR, ) -> Union[Tuple[np.ndarray, float, float], |
|
|
np.ndarray]: |
|
|
"""Resize image to a given size. |
|
|
|
|
|
Args: |
|
|
img (ndarray): The input image. |
|
|
size (tuple[int]): Target size (w, h). |
|
|
return_scale (bool): Whether to return `w_scale` and `h_scale`. |
|
|
interpolation (str): Interpolation method, accepted values are |
|
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
|
backend, "nearest", "bilinear" for 'pillow' backend. |
|
|
out (ndarray): The output destination. |
|
|
backend (str | None): The image resize backend type. Options are `cv2`, |
|
|
`pillow`, `None`. If backend is None, the global imread_backend |
|
|
specified by ``mmcv.use_backend()`` will be used. Default: None. |
|
|
|
|
|
Returns: |
|
|
tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or |
|
|
`resized_img`. |
|
|
""" |
|
|
h, w = img.shape[:2] |
|
|
if backend is None: |
|
|
backend = imread_backend |
|
|
if backend not in ['cv2', 'pillow']: |
|
|
raise ValueError(f'backend: {backend} is not supported for resize.' |
|
|
f"Supported backends are 'cv2', 'pillow'") |
|
|
|
|
|
if backend == 'pillow': |
|
|
assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' |
|
|
pil_image = Image.fromarray(img) |
|
|
pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) |
|
|
resized_img = np.array(pil_image) |
|
|
else: |
|
|
resized_img = cv2.resize(img, size, dst=out, interpolation=interp) |
|
|
if not return_scale: |
|
|
return resized_img |
|
|
else: |
|
|
w_scale = size[0] / w |
|
|
h_scale = size[1] / h |
|
|
return resized_img, w_scale, h_scale |
|
|
|
|
|
|
|
|
class PETR_Resize: |
|
|
"""Resize images & bbox & mask. |
|
|
|
|
|
This transform resizes the input image to some scale. Bboxes and masks are |
|
|
then resized with the same scale factor. If the input dict contains the key |
|
|
"scale", then the scale in the input dict is used, otherwise the specified |
|
|
scale in the init method is used. If the input dict contains the key |
|
|
"scale_factor" (if MultiScaleFlipAug does not give img_scale but |
|
|
scale_factor), the actual scale will be computed by image shape and |
|
|
scale_factor. |
|
|
|
|
|
`img_scale` can either be a tuple (single-scale) or a list of tuple |
|
|
(multi-scale). There are 3 multiscale modes: |
|
|
|
|
|
- ``ratio_range is not None``: randomly sample a ratio from the ratio \ |
|
|
range and multiply it with the image scale. |
|
|
- ``ratio_range is None`` and ``multiscale_mode == "range"``: randomly \ |
|
|
sample a scale from the multiscale range. |
|
|
- ``ratio_range is None`` and ``multiscale_mode == "value"``: randomly \ |
|
|
sample a scale from multiple scales. |
|
|
|
|
|
Args: |
|
|
img_scale (tuple or list[tuple]): Images scales for resizing. |
|
|
multiscale_mode (str): Either "range" or "value". |
|
|
ratio_range (tuple[float]): (min_ratio, max_ratio) |
|
|
keep_ratio (bool): Whether to keep the aspect ratio when resizing the |
|
|
image. |
|
|
bbox_clip_border (bool, optional): Whether to clip the objects outside |
|
|
the border of the image. In some dataset like MOT17, the gt bboxes |
|
|
are allowed to cross the border of images. Therefore, we don't |
|
|
need to clip the gt bboxes in these cases. Defaults to True. |
|
|
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. |
|
|
These two backends generates slightly different results. Defaults |
|
|
to 'cv2'. |
|
|
interpolation (str): Interpolation method, accepted values are |
|
|
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' |
|
|
backend, "nearest", "bilinear" for 'pillow' backend. |
|
|
override (bool, optional): Whether to override `scale` and |
|
|
`scale_factor` so as to call resize twice. Default False. If True, |
|
|
after the first resizing, the existed `scale` and `scale_factor` |
|
|
will be ignored so the second resizing can be allowed. |
|
|
This option is a work-around for multiple times of resize in DETR. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, |
|
|
img_scale=None, |
|
|
multiscale_mode='range', |
|
|
ratio_range=None, |
|
|
keep_ratio=True, |
|
|
bbox_clip_border=True, |
|
|
backend='cv2', |
|
|
interpolation='bilinear', |
|
|
override=False, |
|
|
keypoint_clip_border=True): |
|
|
if img_scale is None: |
|
|
self.img_scale = None |
|
|
else: |
|
|
if isinstance(img_scale, list): |
|
|
self.img_scale = img_scale |
|
|
else: |
|
|
self.img_scale = [img_scale] |
|
|
assert isinstance(self.img_scale, list) |
|
|
|
|
|
if ratio_range is not None: |
|
|
|
|
|
assert len(self.img_scale) == 1 |
|
|
else: |
|
|
|
|
|
assert multiscale_mode in ['value', 'range'] |
|
|
|
|
|
self.backend = backend |
|
|
self.multiscale_mode = multiscale_mode |
|
|
self.ratio_range = ratio_range |
|
|
self.keep_ratio = keep_ratio |
|
|
|
|
|
self.interpolation = interpolation |
|
|
self.override = override |
|
|
self.bbox_clip_border = bbox_clip_border |
|
|
self.keypoint_clip_border = keypoint_clip_border |
|
|
|
|
|
@staticmethod |
|
|
def random_select(img_scales): |
|
|
"""Randomly select an img_scale from given candidates. |
|
|
|
|
|
Args: |
|
|
img_scales (list[tuple]): Images scales for selection. |
|
|
|
|
|
Returns: |
|
|
(tuple, int): Returns a tuple ``(img_scale, scale_dix)``, \ |
|
|
where ``img_scale`` is the selected image scale and \ |
|
|
``scale_idx`` is the selected index in the given candidates. |
|
|
""" |
|
|
|
|
|
assert isinstance(img_scales, list) |
|
|
scale_idx = np.random.randint(len(img_scales)) |
|
|
img_scale = img_scales[scale_idx] |
|
|
return img_scale, scale_idx |
|
|
|
|
|
@staticmethod |
|
|
def random_sample(img_scales): |
|
|
"""Randomly sample an img_scale when ``multiscale_mode=='range'``. |
|
|
|
|
|
Args: |
|
|
img_scales (list[tuple]): Images scale range for sampling. |
|
|
There must be two tuples in img_scales, which specify the lower |
|
|
and upper bound of image scales. |
|
|
|
|
|
Returns: |
|
|
(tuple, None): Returns a tuple ``(img_scale, None)``, where \ |
|
|
``img_scale`` is sampled scale and None is just a placeholder \ |
|
|
to be consistent with :func:`random_select`. |
|
|
""" |
|
|
|
|
|
assert isinstance(img_scales, list) and len(img_scales) == 2 |
|
|
img_scale_long = [max(s) for s in img_scales] |
|
|
img_scale_short = [min(s) for s in img_scales] |
|
|
long_edge = np.random.randint( |
|
|
min(img_scale_long), max(img_scale_long) + 1) |
|
|
short_edge = np.random.randint( |
|
|
min(img_scale_short), max(img_scale_short) + 1) |
|
|
img_scale = (long_edge, short_edge) |
|
|
return img_scale, None |
|
|
|
|
|
@staticmethod |
|
|
def random_sample_ratio(img_scale, ratio_range): |
|
|
"""Randomly sample an img_scale when ``ratio_range`` is specified. |
|
|
|
|
|
A ratio will be randomly sampled from the range specified by |
|
|
``ratio_range``. Then it would be multiplied with ``img_scale`` to |
|
|
generate sampled scale. |
|
|
|
|
|
Args: |
|
|
img_scale (list): Images scale base to multiply with ratio. |
|
|
ratio_range (tuple[float]): The minimum and maximum ratio to scale |
|
|
the ``img_scale``. |
|
|
|
|
|
Returns: |
|
|
(tuple, None): Returns a tuple ``(scale, None)``, where \ |
|
|
``scale`` is sampled ratio multiplied with ``img_scale`` and \ |
|
|
None is just a placeholder to be consistent with \ |
|
|
:func:`random_select`. |
|
|
""" |
|
|
|
|
|
assert isinstance(img_scale, list) and len(img_scale) == 2 |
|
|
min_ratio, max_ratio = ratio_range |
|
|
assert min_ratio <= max_ratio |
|
|
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio |
|
|
scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) |
|
|
return scale, None |
|
|
|
|
|
def _random_scale(self, results): |
|
|
"""Randomly sample an img_scale according to ``ratio_range`` and |
|
|
``multiscale_mode``. |
|
|
|
|
|
If ``ratio_range`` is specified, a ratio will be sampled and be |
|
|
multiplied with ``img_scale``. |
|
|
If multiple scales are specified by ``img_scale``, a scale will be |
|
|
sampled according to ``multiscale_mode``. |
|
|
Otherwise, single scale will be used. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from :obj:`dataset`. |
|
|
|
|
|
Returns: |
|
|
dict: Two new keys 'scale` and 'scale_idx` are added into \ |
|
|
``results``, which would be used by subsequent pipelines. |
|
|
""" |
|
|
|
|
|
if self.ratio_range is not None: |
|
|
scale, scale_idx = self.random_sample_ratio(self.img_scale[0], |
|
|
self.ratio_range) |
|
|
elif len(self.img_scale) == 1: |
|
|
scale, scale_idx = self.img_scale[0], 0 |
|
|
elif self.multiscale_mode == 'range': |
|
|
scale, scale_idx = self.random_sample(self.img_scale) |
|
|
elif self.multiscale_mode == 'value': |
|
|
scale, scale_idx = self.random_select(self.img_scale) |
|
|
else: |
|
|
raise NotImplementedError |
|
|
results['scale'] = scale |
|
|
results['scale_idx'] = scale_idx |
|
|
|
|
|
def _resize_img(self, results): |
|
|
"""Resize images with ``results['scale']``.""" |
|
|
for key in ['image'] if 'image' in results else []: |
|
|
if self.keep_ratio: |
|
|
img, scale_factor = imrescale( |
|
|
results[key], |
|
|
results['scale'], |
|
|
return_scale=True, |
|
|
interpolation=self.interpolation, |
|
|
backend=self.backend) |
|
|
|
|
|
|
|
|
new_h, new_w = img.shape[:2] |
|
|
h, w = results[key].shape[:2] |
|
|
w_scale = new_w / w |
|
|
h_scale = new_h / h |
|
|
else: |
|
|
img, w_scale, h_scale = imresize( |
|
|
results[key], |
|
|
results['scale'], |
|
|
return_scale=True, |
|
|
interpolation=self.interpolation, |
|
|
backend=self.backend) |
|
|
|
|
|
scale_factor = np.array( |
|
|
[w_scale, h_scale, w_scale, h_scale], dtype=np.float32) |
|
|
results['im_shape'] = np.array(img.shape) |
|
|
|
|
|
results['pad_shape'] = img.shape |
|
|
results['scale_factor'] = scale_factor |
|
|
results['keep_ratio'] = self.keep_ratio |
|
|
|
|
|
results[key] = img |
|
|
|
|
|
def _resize_bboxes(self, results): |
|
|
"""Resize bounding boxes with ``results['scale_factor']``.""" |
|
|
for key in ['gt_bbox'] if 'gt_bbox' in results else []: |
|
|
bboxes = results[key] * results['scale_factor'] |
|
|
if self.bbox_clip_border: |
|
|
img_shape = results['im_shape'] |
|
|
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1]) |
|
|
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0]) |
|
|
results[key] = bboxes |
|
|
|
|
|
def _resize_masks(self, results): |
|
|
"""Resize masks with ``results['scale']``""" |
|
|
for key in ['mask'] if 'mask' in results else []: |
|
|
if results[key] is None: |
|
|
continue |
|
|
if self.keep_ratio: |
|
|
results[key] = results[key].rescale(results['scale']) |
|
|
else: |
|
|
results[key] = results[key].resize(results['im_shape'][:2]) |
|
|
|
|
|
def _resize_seg(self, results): |
|
|
"""Resize semantic segmentation map with ``results['scale']``.""" |
|
|
for key in ['seg'] if 'seg' in results else []: |
|
|
if self.keep_ratio: |
|
|
gt_seg = imrescale( |
|
|
results[key], |
|
|
results['scale'], |
|
|
interpolation='nearest', |
|
|
backend=self.backend) |
|
|
else: |
|
|
gt_seg = imresize( |
|
|
results[key], |
|
|
results['scale'], |
|
|
interpolation='nearest', |
|
|
backend=self.backend) |
|
|
results[key] = gt_seg |
|
|
|
|
|
def _resize_keypoints(self, results): |
|
|
"""Resize keypoints with ``results['scale_factor']``.""" |
|
|
for key in ['gt_joints'] if 'gt_joints' in results else []: |
|
|
keypoints = results[key].copy() |
|
|
keypoints[..., 0] = keypoints[..., 0] * results['scale_factor'][0] |
|
|
keypoints[..., 1] = keypoints[..., 1] * results['scale_factor'][1] |
|
|
if self.keypoint_clip_border: |
|
|
img_shape = results['im_shape'] |
|
|
keypoints[..., 0] = np.clip(keypoints[..., 0], 0, img_shape[1]) |
|
|
keypoints[..., 1] = np.clip(keypoints[..., 1], 0, img_shape[0]) |
|
|
results[key] = keypoints |
|
|
|
|
|
def _resize_areas(self, results): |
|
|
"""Resize mask areas with ``results['scale_factor']``.""" |
|
|
for key in ['gt_areas'] if 'gt_areas' in results else []: |
|
|
areas = results[key].copy() |
|
|
areas = areas * results['scale_factor'][0] * results[ |
|
|
'scale_factor'][1] |
|
|
results[key] = areas |
|
|
|
|
|
def __call__(self, results): |
|
|
"""Call function to resize images, bounding boxes, masks, semantic |
|
|
segmentation map. |
|
|
|
|
|
Args: |
|
|
results (dict): Result dict from loading pipeline. |
|
|
|
|
|
Returns: |
|
|
dict: Resized results, 'im_shape', 'pad_shape', 'scale_factor', \ |
|
|
'keep_ratio' keys are added into result dict. |
|
|
""" |
|
|
if 'scale' not in results: |
|
|
if 'scale_factor' in results: |
|
|
img_shape = results['image'].shape[:2] |
|
|
scale_factor = results['scale_factor'][0] |
|
|
|
|
|
results['scale'] = [int(x * scale_factor) |
|
|
for x in img_shape][::-1] |
|
|
else: |
|
|
self._random_scale(results) |
|
|
else: |
|
|
if not self.override: |
|
|
assert 'scale_factor' not in results, ( |
|
|
'scale and scale_factor cannot be both set.') |
|
|
else: |
|
|
results.pop('scale') |
|
|
if 'scale_factor' in results: |
|
|
results.pop('scale_factor') |
|
|
self._random_scale(results) |
|
|
|
|
|
self._resize_img(results) |
|
|
self._resize_bboxes(results) |
|
|
self._resize_masks(results) |
|
|
self._resize_seg(results) |
|
|
self._resize_keypoints(results) |
|
|
self._resize_areas(results) |
|
|
return results |
|
|
|
|
|
def __repr__(self): |
|
|
repr_str = self.__class__.__name__ |
|
|
repr_str += f'(img_scale={self.img_scale}, ' |
|
|
repr_str += f'multiscale_mode={self.multiscale_mode}, ' |
|
|
repr_str += f'ratio_range={self.ratio_range}, ' |
|
|
repr_str += f'keep_ratio={self.keep_ratio}, ' |
|
|
repr_str += f'bbox_clip_border={self.bbox_clip_border})' |
|
|
repr_str += f'keypoint_clip_border={self.keypoint_clip_border})' |
|
|
return repr_str |
|
|
|