Spaces:

microsoft
/

VITRA

Running on Zero

App Files Files Community

VITRA / thirdparty /HaWoR /lib /utils /imutils.py

arnoldland

Initial commit

aae3ba1 15 days ago

raw

history blame contribute delete

9.45 kB

	"""
	This file contains functions that are used to perform data augmentation.
	"""
	import torch
	import numpy as np
	from skimage.transform import rotate, resize
	import cv2
	from torchvision.transforms import Normalize, ToTensor, Compose

	from lib.core import constants

	def get_normalization():
	normalize_img = Compose([ToTensor(),
	Normalize(mean=constants.IMG_NORM_MEAN,
	std=constants.IMG_NORM_STD)
	])
	return normalize_img

	def get_transform(center, scale, res, rot=0):
	"""Generate transformation matrix."""
	h = 200 * scale + 1e-6
	t = np.zeros((3, 3))
	t[0, 0] = float(res[1]) / h
	t[1, 1] = float(res[0]) / h
	t[0, 2] = res[1] * (-float(center[0]) / h + .5)
	t[1, 2] = res[0] * (-float(center[1]) / h + .5)
	t[2, 2] = 1
	if not rot == 0:
	rot = -rot # To match direction of rotation from cropping
	rot_mat = np.zeros((3,3))
	rot_rad = rot * np.pi / 180
	sn,cs = np.sin(rot_rad), np.cos(rot_rad)
	rot_mat[0,:2] = [cs, -sn]
	rot_mat[1,:2] = [sn, cs]
	rot_mat[2,2] = 1
	# Need to rotate around center
	t_mat = np.eye(3)
	t_mat[0,2] = -res[1]/2
	t_mat[1,2] = -res[0]/2
	t_inv = t_mat.copy()
	t_inv[:2,2] *= -1
	t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t)))
	return t

	def transform(pt, center, scale, res, invert=0, rot=0, asint=True):
	"""Transform pixel location to different reference."""
	t = get_transform(center, scale, res, rot=rot)
	if invert:
	t = np.linalg.inv(t)
	new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T
	new_pt = np.dot(t, new_pt)

	if asint:
	return new_pt[:2].astype(int)+1
	else:
	return new_pt[:2]+1

	def transform_pts(pts, center, scale, res, invert=0, rot=0, asint=True):
	"""Transform pixel location to different reference."""
	t = get_transform(center, scale, res, rot=rot)
	if invert:
	t = np.linalg.inv(t)
	pts = np.concatenate((pts, np.ones_like(pts)[:, [0]]), axis=-1)
	new_pt = pts.T
	new_pt = np.dot(t, new_pt)

	if asint:
	return new_pt[:2, :].T.astype(int)
	else:
	return new_pt[:2, :].T

	def crop(img, center, scale, res, rot=0):
	"""Crop image according to the supplied bounding box."""
	# Upper left point
	ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
	# Bottom right point
	br = np.array(transform([res[0]+1,
	res[1]+1], center, scale, res, invert=1))-1

	# Padding so that when rotated proper amount of context is included
	pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
	if not rot == 0:
	ul -= pad
	br += pad

	new_shape = [br[1] - ul[1], br[0] - ul[0]]
	if len(img.shape) > 2:
	new_shape += [img.shape[2]]
	new_img = np.zeros(new_shape)


	# Range to fill new array
	new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
	new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
	# Range to sample from original image
	old_x = max(0, ul[0]), min(len(img[0]), br[0])
	old_y = max(0, ul[1]), min(len(img), br[1])
	try:
	new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
	old_x[0]:old_x[1]]
	except:
	print("invlid bbox, fill with 0")

	if not rot == 0:
	# Remove padding
	new_img = rotate(new_img, rot)
	new_img = new_img[pad:-pad, pad:-pad]

	new_img = resize(new_img, res)
	return new_img

	def crop_j2d(j2d, center, scale, res, rot=0):
	"""Crop image according to the supplied bounding box."""
	# Upper left point
	# crop_j2d = np.array(transform_pts(j2d, center, scale, res, invert=0))
	b = scale * 200
	points2d = j2d - (center - b/2)
	points2d = points2d * (res[0] / b)

	return points2d


	def crop_crop(img, center, scale, res, rot=0):
	"""Crop image according to the supplied bounding box."""
	# Upper left point
	ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
	# Bottom right point
	br = np.array(transform([res[0]+1,
	res[1]+1], center, scale, res, invert=1))-1

	# Padding so that when rotated proper amount of context is included
	pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
	if not rot == 0:
	ul -= pad
	br += pad

	new_shape = [br[1] - ul[1], br[0] - ul[0]]
	if len(img.shape) > 2:
	new_shape += [img.shape[2]]
	new_img = np.zeros(new_shape)


	if new_img.shape[0] > img.shape[0]:
	p = (new_img.shape[0] - img.shape[0]) / 2
	p = int(p)
	new_img = cv2.copyMakeBorder(img, p, p, p, p, cv2.BORDER_REPLICATE)

	# Range to fill new array
	new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
	new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
	# Range to sample from original image
	old_x = max(0, ul[0]), min(len(img[0]), br[0])
	old_y = max(0, ul[1]), min(len(img), br[1])
	new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
	old_x[0]:old_x[1]]

	if not rot == 0:
	# Remove padding
	new_img = rotate(new_img, rot)
	new_img = new_img[pad:-pad, pad:-pad]

	new_img = resize(new_img, res)
	return new_img

	def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True):
	"""'Undo' the image cropping/resizing.
	This function is used when evaluating mask/part segmentation.
	"""
	res = img.shape[:2]
	# Upper left point
	ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
	# Bottom right point
	br = np.array(transform([res[0]+1,res[1]+1], center, scale, res, invert=1))-1
	# size of cropped image
	crop_shape = [br[1] - ul[1], br[0] - ul[0]]

	new_shape = [br[1] - ul[1], br[0] - ul[0]]
	if len(img.shape) > 2:
	new_shape += [img.shape[2]]
	new_img = np.zeros(orig_shape, dtype=np.uint8)
	# Range to fill new array
	new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0]
	new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1]
	# Range to sample from original image
	old_x = max(0, ul[0]), min(orig_shape[1], br[0])
	old_y = max(0, ul[1]), min(orig_shape[0], br[1])
	img = resize(img, crop_shape, interp='nearest')
	new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]]
	return new_img

	def rot_aa(aa, rot):
	"""Rotate axis angle parameters."""
	# pose parameters
	R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
	[np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
	[0, 0, 1]])
	# find the rotation of the body in camera frame
	per_rdg, _ = cv2.Rodrigues(aa)
	# apply the global rotation to the global orientation
	resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg))
	aa = (resrot.T)[0]
	return aa

	def flip_img(img):
	"""Flip rgb images or masks.
	channels come last, e.g. (256,256,3).
	"""
	img = np.fliplr(img)
	return img

	def flip_kp(kp):
	"""Flip keypoints."""
	if len(kp) == 24:
	flipped_parts = constants.J24_FLIP_PERM
	elif len(kp) == 49:
	flipped_parts = constants.J49_FLIP_PERM
	kp = kp[flipped_parts]
	kp[:,0] = - kp[:,0]
	return kp

	def flip_pose(pose):
	"""Flip pose.
	The flipping is based on SMPL parameters.
	"""
	flipped_parts = constants.SMPL_POSE_FLIP_PERM
	pose = pose[flipped_parts]
	# we also negate the second and the third dimension of the axis-angle
	pose[1::3] = -pose[1::3]
	pose[2::3] = -pose[2::3]
	return pose


	def crop_img(img, center, scale, res, val=255):
	"""Crop image according to the supplied bounding box."""
	# Upper left point
	ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
	# Bottom right point
	br = np.array(transform([res[0]+1,
	res[1]+1], center, scale, res, invert=1))-1

	new_shape = [br[1] - ul[1], br[0] - ul[0]]
	if len(img.shape) > 2:
	new_shape += [img.shape[2]]
	new_img = np.ones(new_shape) * val

	# Range to fill new array
	new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
	new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
	# Range to sample from original image
	old_x = max(0, ul[0]), min(len(img[0]), br[0])
	old_y = max(0, ul[1]), min(len(img), br[1])
	new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
	old_x[0]:old_x[1]]
	new_img = resize(new_img, res)
	return new_img


	def boxes_2_cs(boxes):
	x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
	w, h = x2-x1, y2-y1
	cx, cy = x1+w/2, y1+h/2
	size = np.stack([w, h]).max(axis=0)

	centers = np.stack([cx, cy], axis=1)
	scales = size / 200
	return centers, scales


	def box_2_cs(box):
	x1,y1,x2,y2 = box[:4].int().tolist()

	w, h = x2-x1, y2-y1
	cx, cy = x1+w/2, y1+h/2
	size = max(w, h)

	center = [cx, cy]
	scale = size / 200
	return center, scale


	def est_intrinsics(img_shape):
	h, w, c = img_shape
	img_center = torch.tensor([w/2., h/2.]).float()
	img_focal = torch.tensor(np.sqrt(h2 + w2)).float()
	return img_center, img_focal