|
|
""" |
|
|
This file contains functions that are used to perform data augmentation. |
|
|
""" |
|
|
import torch |
|
|
import numpy as np |
|
|
from skimage.transform import rotate, resize |
|
|
import cv2 |
|
|
from torchvision.transforms import Normalize, ToTensor, Compose |
|
|
|
|
|
from lib.core import constants |
|
|
|
|
|
def get_normalization(): |
|
|
normalize_img = Compose([ToTensor(), |
|
|
Normalize(mean=constants.IMG_NORM_MEAN, |
|
|
std=constants.IMG_NORM_STD) |
|
|
]) |
|
|
return normalize_img |
|
|
|
|
|
def get_transform(center, scale, res, rot=0): |
|
|
"""Generate transformation matrix.""" |
|
|
h = 200 * scale + 1e-6 |
|
|
t = np.zeros((3, 3)) |
|
|
t[0, 0] = float(res[1]) / h |
|
|
t[1, 1] = float(res[0]) / h |
|
|
t[0, 2] = res[1] * (-float(center[0]) / h + .5) |
|
|
t[1, 2] = res[0] * (-float(center[1]) / h + .5) |
|
|
t[2, 2] = 1 |
|
|
if not rot == 0: |
|
|
rot = -rot |
|
|
rot_mat = np.zeros((3,3)) |
|
|
rot_rad = rot * np.pi / 180 |
|
|
sn,cs = np.sin(rot_rad), np.cos(rot_rad) |
|
|
rot_mat[0,:2] = [cs, -sn] |
|
|
rot_mat[1,:2] = [sn, cs] |
|
|
rot_mat[2,2] = 1 |
|
|
|
|
|
t_mat = np.eye(3) |
|
|
t_mat[0,2] = -res[1]/2 |
|
|
t_mat[1,2] = -res[0]/2 |
|
|
t_inv = t_mat.copy() |
|
|
t_inv[:2,2] *= -1 |
|
|
t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t))) |
|
|
return t |
|
|
|
|
|
def transform(pt, center, scale, res, invert=0, rot=0, asint=True): |
|
|
"""Transform pixel location to different reference.""" |
|
|
t = get_transform(center, scale, res, rot=rot) |
|
|
if invert: |
|
|
t = np.linalg.inv(t) |
|
|
new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T |
|
|
new_pt = np.dot(t, new_pt) |
|
|
|
|
|
if asint: |
|
|
return new_pt[:2].astype(int)+1 |
|
|
else: |
|
|
return new_pt[:2]+1 |
|
|
|
|
|
def transform_pts(pts, center, scale, res, invert=0, rot=0, asint=True): |
|
|
"""Transform pixel location to different reference.""" |
|
|
t = get_transform(center, scale, res, rot=rot) |
|
|
if invert: |
|
|
t = np.linalg.inv(t) |
|
|
pts = np.concatenate((pts, np.ones_like(pts)[:, [0]]), axis=-1) |
|
|
new_pt = pts.T |
|
|
new_pt = np.dot(t, new_pt) |
|
|
|
|
|
if asint: |
|
|
return new_pt[:2, :].T.astype(int) |
|
|
else: |
|
|
return new_pt[:2, :].T |
|
|
|
|
|
def crop(img, center, scale, res, rot=0): |
|
|
"""Crop image according to the supplied bounding box.""" |
|
|
|
|
|
ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 |
|
|
|
|
|
br = np.array(transform([res[0]+1, |
|
|
res[1]+1], center, scale, res, invert=1))-1 |
|
|
|
|
|
|
|
|
pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) |
|
|
if not rot == 0: |
|
|
ul -= pad |
|
|
br += pad |
|
|
|
|
|
new_shape = [br[1] - ul[1], br[0] - ul[0]] |
|
|
if len(img.shape) > 2: |
|
|
new_shape += [img.shape[2]] |
|
|
new_img = np.zeros(new_shape) |
|
|
|
|
|
|
|
|
|
|
|
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] |
|
|
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] |
|
|
|
|
|
old_x = max(0, ul[0]), min(len(img[0]), br[0]) |
|
|
old_y = max(0, ul[1]), min(len(img), br[1]) |
|
|
try: |
|
|
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], |
|
|
old_x[0]:old_x[1]] |
|
|
except: |
|
|
print("invlid bbox, fill with 0") |
|
|
|
|
|
if not rot == 0: |
|
|
|
|
|
new_img = rotate(new_img, rot) |
|
|
new_img = new_img[pad:-pad, pad:-pad] |
|
|
|
|
|
new_img = resize(new_img, res) |
|
|
return new_img |
|
|
|
|
|
def crop_j2d(j2d, center, scale, res, rot=0): |
|
|
"""Crop image according to the supplied bounding box.""" |
|
|
|
|
|
|
|
|
b = scale * 200 |
|
|
points2d = j2d - (center - b/2) |
|
|
points2d = points2d * (res[0] / b) |
|
|
|
|
|
return points2d |
|
|
|
|
|
|
|
|
def crop_crop(img, center, scale, res, rot=0): |
|
|
"""Crop image according to the supplied bounding box.""" |
|
|
|
|
|
ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 |
|
|
|
|
|
br = np.array(transform([res[0]+1, |
|
|
res[1]+1], center, scale, res, invert=1))-1 |
|
|
|
|
|
|
|
|
pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) |
|
|
if not rot == 0: |
|
|
ul -= pad |
|
|
br += pad |
|
|
|
|
|
new_shape = [br[1] - ul[1], br[0] - ul[0]] |
|
|
if len(img.shape) > 2: |
|
|
new_shape += [img.shape[2]] |
|
|
new_img = np.zeros(new_shape) |
|
|
|
|
|
|
|
|
if new_img.shape[0] > img.shape[0]: |
|
|
p = (new_img.shape[0] - img.shape[0]) / 2 |
|
|
p = int(p) |
|
|
new_img = cv2.copyMakeBorder(img, p, p, p, p, cv2.BORDER_REPLICATE) |
|
|
|
|
|
|
|
|
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] |
|
|
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] |
|
|
|
|
|
old_x = max(0, ul[0]), min(len(img[0]), br[0]) |
|
|
old_y = max(0, ul[1]), min(len(img), br[1]) |
|
|
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], |
|
|
old_x[0]:old_x[1]] |
|
|
|
|
|
if not rot == 0: |
|
|
|
|
|
new_img = rotate(new_img, rot) |
|
|
new_img = new_img[pad:-pad, pad:-pad] |
|
|
|
|
|
new_img = resize(new_img, res) |
|
|
return new_img |
|
|
|
|
|
def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True): |
|
|
"""'Undo' the image cropping/resizing. |
|
|
This function is used when evaluating mask/part segmentation. |
|
|
""" |
|
|
res = img.shape[:2] |
|
|
|
|
|
ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 |
|
|
|
|
|
br = np.array(transform([res[0]+1,res[1]+1], center, scale, res, invert=1))-1 |
|
|
|
|
|
crop_shape = [br[1] - ul[1], br[0] - ul[0]] |
|
|
|
|
|
new_shape = [br[1] - ul[1], br[0] - ul[0]] |
|
|
if len(img.shape) > 2: |
|
|
new_shape += [img.shape[2]] |
|
|
new_img = np.zeros(orig_shape, dtype=np.uint8) |
|
|
|
|
|
new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0] |
|
|
new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1] |
|
|
|
|
|
old_x = max(0, ul[0]), min(orig_shape[1], br[0]) |
|
|
old_y = max(0, ul[1]), min(orig_shape[0], br[1]) |
|
|
img = resize(img, crop_shape, interp='nearest') |
|
|
new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]] |
|
|
return new_img |
|
|
|
|
|
def rot_aa(aa, rot): |
|
|
"""Rotate axis angle parameters.""" |
|
|
|
|
|
R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], |
|
|
[np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], |
|
|
[0, 0, 1]]) |
|
|
|
|
|
per_rdg, _ = cv2.Rodrigues(aa) |
|
|
|
|
|
resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg)) |
|
|
aa = (resrot.T)[0] |
|
|
return aa |
|
|
|
|
|
def flip_img(img): |
|
|
"""Flip rgb images or masks. |
|
|
channels come last, e.g. (256,256,3). |
|
|
""" |
|
|
img = np.fliplr(img) |
|
|
return img |
|
|
|
|
|
def flip_kp(kp): |
|
|
"""Flip keypoints.""" |
|
|
if len(kp) == 24: |
|
|
flipped_parts = constants.J24_FLIP_PERM |
|
|
elif len(kp) == 49: |
|
|
flipped_parts = constants.J49_FLIP_PERM |
|
|
kp = kp[flipped_parts] |
|
|
kp[:,0] = - kp[:,0] |
|
|
return kp |
|
|
|
|
|
def flip_pose(pose): |
|
|
"""Flip pose. |
|
|
The flipping is based on SMPL parameters. |
|
|
""" |
|
|
flipped_parts = constants.SMPL_POSE_FLIP_PERM |
|
|
pose = pose[flipped_parts] |
|
|
|
|
|
pose[1::3] = -pose[1::3] |
|
|
pose[2::3] = -pose[2::3] |
|
|
return pose |
|
|
|
|
|
|
|
|
def crop_img(img, center, scale, res, val=255): |
|
|
"""Crop image according to the supplied bounding box.""" |
|
|
|
|
|
ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 |
|
|
|
|
|
br = np.array(transform([res[0]+1, |
|
|
res[1]+1], center, scale, res, invert=1))-1 |
|
|
|
|
|
new_shape = [br[1] - ul[1], br[0] - ul[0]] |
|
|
if len(img.shape) > 2: |
|
|
new_shape += [img.shape[2]] |
|
|
new_img = np.ones(new_shape) * val |
|
|
|
|
|
|
|
|
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] |
|
|
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] |
|
|
|
|
|
old_x = max(0, ul[0]), min(len(img[0]), br[0]) |
|
|
old_y = max(0, ul[1]), min(len(img), br[1]) |
|
|
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], |
|
|
old_x[0]:old_x[1]] |
|
|
new_img = resize(new_img, res) |
|
|
return new_img |
|
|
|
|
|
|
|
|
def boxes_2_cs(boxes): |
|
|
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] |
|
|
w, h = x2-x1, y2-y1 |
|
|
cx, cy = x1+w/2, y1+h/2 |
|
|
size = np.stack([w, h]).max(axis=0) |
|
|
|
|
|
centers = np.stack([cx, cy], axis=1) |
|
|
scales = size / 200 |
|
|
return centers, scales |
|
|
|
|
|
|
|
|
def box_2_cs(box): |
|
|
x1,y1,x2,y2 = box[:4].int().tolist() |
|
|
|
|
|
w, h = x2-x1, y2-y1 |
|
|
cx, cy = x1+w/2, y1+h/2 |
|
|
size = max(w, h) |
|
|
|
|
|
center = [cx, cy] |
|
|
scale = size / 200 |
|
|
return center, scale |
|
|
|
|
|
|
|
|
def est_intrinsics(img_shape): |
|
|
h, w, c = img_shape |
|
|
img_center = torch.tensor([w/2., h/2.]).float() |
|
|
img_focal = torch.tensor(np.sqrt(h**2 + w**2)).float() |
|
|
return img_center, img_focal |
|
|
|
|
|
|