Spaces:

dqj5182
/

HACO

Sleeping

App Files Files Community

HACO / lib /utils /preprocessing.py

dqj5182

init

5732928 9 months ago

raw

history blame contribute delete

12.7 kB

	import cv2
	import torch
	import random
	import numpy as np
	import torch.nn.functional as F

	from lib.core.config import cfg
	from lib.utils.human_models import mano


	def get_aug_config_contact():
	# Augmentation intensity factors
	scale_factor = 0.25
	rot_factor = 30
	color_factor = 0.2
	trans_factor = 0.1 # Translation range (recommended 0.1 to 0.2)
	noise_std = 0.02 # Gaussian noise strength
	motion_blur_prob = 0.15 # Probability of applying motion blur
	extreme_crop_prob = 0.1 # Probability for extreme cropping
	extreme_crop_lvl = 0.3 # Crop intensity (recommended 0.2 to 0.4)
	low_res_prob = 0.05 # Probability for applying low resolution
	low_res_scale_range = (0.15, 0.5) # Range for low-res scaling

	# Scaling augmentation
	scale = np.clip(np.random.randn(), -1.0, 1.0) * scale_factor + 1.0

	# Rotation augmentation
	rot = np.clip(np.random.randn(), -2.0, 2.0) * rot_factor if random.random() <= 0.6 else 0

	# Color augmentation
	c_up = 1.0 + color_factor
	c_low = 1.0 - color_factor
	color_scale = np.array([
	random.uniform(c_low, c_up),
	random.uniform(c_low, c_up),
	random.uniform(c_low, c_up)
	])

	# Flipping augmentation
	do_flip = random.random() <= 0.5

	# Translation augmentation
	tx = np.clip(np.random.randn(), -1.0, 1.0) * trans_factor
	ty = np.clip(np.random.randn(), -1.0, 1.0) * trans_factor

	# Extreme cropping augmentation
	do_extreme_crop = random.random() <= extreme_crop_prob

	# Noise augmentation (returns standard deviation for Gaussian noise injection)
	add_noise = random.random() <= 0.3 # 30% chance of adding noise
	noise_std = noise_std if add_noise else 0.0

	# Motion blur augmentation
	apply_motion_blur = random.random() <= motion_blur_prob
	motion_blur_kernel_size = random.choice([3, 5, 7]) if apply_motion_blur else 0

	# Low-resolution augmentation
	apply_low_res = random.random() <= low_res_prob
	low_res_scale = random.uniform(*low_res_scale_range) if apply_low_res else 1.0

	return {
	'scale': scale,
	'rot': rot,
	'color_scale': color_scale,
	'do_flip': do_flip,
	'tx': tx,
	'ty': ty,
	'do_extreme_crop': do_extreme_crop,
	'extreme_crop_lvl': extreme_crop_lvl if do_extreme_crop else 0,
	'noise_std': noise_std,
	'motion_blur_kernel_size': motion_blur_kernel_size,
	'low_res_scale': low_res_scale # Added low-res scale parameter
	}


	def rotate_2d(pt_2d, rot_rad):
	x = pt_2d[0]
	y = pt_2d[1]
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)
	xx = x * cs - y * sn
	yy = x * sn + y * cs
	return np.array([xx, yy], dtype=np.float32)


	def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False):
	# augment size with scale
	src_w = src_width * scale
	src_h = src_height * scale
	src_center = np.array([c_x, c_y], dtype=np.float32)

	# augment rotation
	rot_rad = np.pi * rot / 180
	src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
	src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)

	dst_w = dst_width
	dst_h = dst_height
	dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
	dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
	dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)

	src = np.zeros((3, 2), dtype=np.float32)
	src[0, :] = src_center
	src[1, :] = src_center + src_downdir
	src[2, :] = src_center + src_rightdir

	dst = np.zeros((3, 2), dtype=np.float32)
	dst[0, :] = dst_center
	dst[1, :] = dst_center + dst_downdir
	dst[2, :] = dst_center + dst_rightdir

	if inv:
	trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
	else:
	trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

	trans = trans.astype(np.float32)
	return trans


	def generate_patch_image_contact(cvimg, bbox, scale, rot, do_flip, out_shape, tx=0.0, ty=0.0, bkg_color='black'):
	img = cvimg.copy()
	img_height, img_width, img_channels = img.shape

	bb_c_x = float(bbox[0] + 0.5 * bbox[2])
	bb_c_y = float(bbox[1] + 0.5 * bbox[3])
	bb_width = float(bbox[2])
	bb_height = float(bbox[3])

	if bkg_color == 'white':
	borderMode=cv2.BORDER_CONSTANT
	borderValue=(255, 255, 255)
	else:
	borderMode=cv2.BORDER_CONSTANT
	borderValue=(0, 0, 0)

	if do_flip:
	img = img[:, ::-1, :]
	bb_c_x = img_width - bb_c_x - 1

	# Add translation offset
	bb_c_x += tx * img_width
	bb_c_y += ty * img_height

	trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height,
	out_shape[1], out_shape[0], scale, rot)
	img_patch = cv2.warpAffine(img, trans, (int(out_shape[1]), int(out_shape[0])), flags=cv2.INTER_LINEAR, borderMode=borderMode, borderValue=borderValue)
	img_patch = img_patch.astype(np.float32)
	inv_trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height,
	out_shape[1], out_shape[0], scale, rot, inv=True)

	return img_patch, trans, inv_trans


	def augmentation_contact(img, bbox, data_split, enforce_flip=None, bkg_color='black'):
	if data_split == 'train':
	aug_params = get_aug_config_contact()
	else:
	aug_params = {
	'scale': 1.0,
	'rot': 0.0,
	'color_scale': np.array([1, 1, 1]),
	'do_flip': False,
	'tx': 0.0,
	'ty': 0.0,
	'do_extreme_crop': False,
	'extreme_crop_lvl': 0.0,
	'noise_std': 0.0,
	'motion_blur_kernel_size': 0,
	'low_res_scale': 1.0 # No low-res in non-training mode
	}

	# Enforce flip if specified
	if enforce_flip is not None:
	aug_params['do_flip'] = enforce_flip

	# Apply geometric augmentations (scaling, rotation, flipping)
	img, trans, inv_trans = generate_patch_image_contact(
	img, bbox, aug_params['scale'], aug_params['rot'],
	aug_params['do_flip'], cfg.MODEL.input_img_shape,
	aug_params['tx'], aug_params['ty'], bkg_color
	)

	# Apply low-resolution augmentation
	if aug_params['low_res_scale'] < 1.0: # Only apply if scaling down
	img = apply_low_res(img, aug_params['low_res_scale'])

	# Apply color augmentation
	img = np.clip(img * aug_params['color_scale'][None, None, :], 0, 255)

	# Apply extreme cropping
	if aug_params['do_extreme_crop']:
	img = apply_extreme_crop(img, aug_params['extreme_crop_lvl'])

	# Apply noise augmentation
	if aug_params['noise_std'] > 0:
	img = add_gaussian_noise(img, aug_params['noise_std'])

	# Apply motion blur augmentation
	if aug_params['motion_blur_kernel_size'] > 0:
	img = apply_motion_blur(img, aug_params['motion_blur_kernel_size'])

	return img, trans, inv_trans, aug_params['rot'], aug_params['do_flip'], aug_params['color_scale']


	def apply_extreme_crop(img, crop_lvl):
	"""Extreme cropping: Aggressively crop the image."""
	h, w = img.shape[:2]
	crop_size = max(1, int(min(h, w) * (1 - crop_lvl))) # Prevent zero-size crops
	start_x = random.randint(0, max(0, w - crop_size))
	start_y = random.randint(0, max(0, h - crop_size))
	cropped_img = img[start_y:start_y + crop_size, start_x:start_x + crop_size]

	# Preserve aspect ratio during resizing
	return cv2.resize(cropped_img, (w, h), interpolation=cv2.INTER_LINEAR)


	def add_gaussian_noise(img, noise_std):
	"""Add Gaussian noise to the image with proper scaling for data type."""
	noise = np.random.normal(0, noise_std, img.shape).astype(np.float32)

	if img.dtype == np.uint8:
	noisy_img = np.clip(img + noise * 255, 0, 255).astype(np.uint8)
	elif img.dtype == np.float32:
	noisy_img = np.clip(img + noise, 0.0, 1.0).astype(np.float32)
	elif img.dtype == np.float64:
	noisy_img = np.clip(img + noise, 0.0, 1.0).astype(np.float64)
	else:
	raise TypeError("Unsupported image dtype. Expected uint8 or float32.")

	return noisy_img


	def apply_motion_blur(img, kernel_size):
	"""Apply motion blur to the image with a random direction."""
	kernel = np.zeros((kernel_size, kernel_size))
	direction = random.choice(['horizontal', 'vertical', 'diagonal'])

	if direction == 'horizontal':
	kernel[(kernel_size - 1) // 2, :] = np.ones(kernel_size)
	elif direction == 'vertical':
	kernel[:, (kernel_size - 1) // 2] = np.ones(kernel_size)
	elif direction == 'diagonal':
	np.fill_diagonal(kernel, 1)

	kernel /= kernel_size # Normalize the kernel
	return cv2.filter2D(img, -1, kernel, borderType=cv2.BORDER_REFLECT)


	def apply_low_res(img, scale_factor=0.25):
	"""Simulate low-resolution effect by downsampling and upsampling."""
	if not (0 < scale_factor < 1):
	raise ValueError("scale_factor should be between 0 and 1.")

	h, w = img.shape[:2]

	# Calculate target dimensions for downsampling
	downsampled_size = (max(1, int(w * scale_factor)), max(1, int(h * scale_factor)))

	# Downsample using INTER_AREA for better quality in aggressive downsampling
	low_res_img = cv2.resize(img, downsampled_size, interpolation=cv2.INTER_AREA)

	# Upsample using INTER_NEAREST for strong pixelation effect
	return cv2.resize(low_res_img, (w, h), interpolation=cv2.INTER_NEAREST).astype(img.dtype)


	def process_human_model_output_orig(human_model_param, cam_param):
	pose, shape, trans = human_model_param['pose'], human_model_param['shape'], human_model_param['trans']
	hand_type = human_model_param['hand_type']
	trans = human_model_param['trans']
	pose = torch.FloatTensor(pose).view(-1,3); shape = torch.FloatTensor(shape).view(1,-1); # mano parameters (pose: 48 dimension, shape: 10 dimension)
	trans = torch.FloatTensor(trans).view(1,-1) # translation vector

	# apply camera extrinsic (rotation)
	# merge root pose and camera rotation
	if 'R' in cam_param:
	R = np.array(cam_param['R'], dtype=np.float32).reshape(3,3)
	root_pose = pose[mano.orig_root_joint_idx,:].numpy()
	root_pose, _ = cv2.Rodrigues(root_pose)
	root_pose, _ = cv2.Rodrigues(np.dot(R,root_pose))
	pose[mano.orig_root_joint_idx] = torch.from_numpy(root_pose).view(3)

	# get root joint coordinate
	root_pose = pose[mano.orig_root_joint_idx].view(1,3)
	hand_pose = torch.cat((pose[:mano.orig_root_joint_idx,:], pose[mano.orig_root_joint_idx+1:,:])).view(1,-1)
	with torch.no_grad():
	output = mano.layer[hand_type](betas=shape, hand_pose=hand_pose, global_orient=root_pose, transl=trans)
	mesh_coord = output.vertices[0].numpy()
	joint_coord = np.dot(mano.joint_regressor, mesh_coord)

	# apply camera exrinsic (translation)
	# compenstate rotation (translation from origin to root joint was not cancled)
	if 'R' in cam_param and 't' in cam_param:
	R, t = np.array(cam_param['R'], dtype=np.float32).reshape(3,3), np.array(cam_param['t'], dtype=np.float32).reshape(1,3)
	root_coord = joint_coord[mano.root_joint_idx,None,:]
	joint_coord = joint_coord - root_coord + np.dot(R, root_coord.transpose(1,0)).transpose(1,0) + t
	mesh_coord = mesh_coord - root_coord + np.dot(R, root_coord.transpose(1,0)).transpose(1,0) + t


	joint_cam_orig = joint_coord.copy()
	mesh_cam_orig = mesh_coord.copy()
	pose_orig, shape_orig, trans_orig = torch.cat((root_pose, hand_pose), dim=-1)[0].detach().cpu().numpy(), shape[0].detach().cpu().numpy(), trans[0].detach().cpu().numpy()

	return mesh_cam_orig, joint_cam_orig, pose_orig, shape_orig, trans_orig


	def mask2bbox(mask, expansion_factor=1.0):
	# Find non-zero elements (object pixels)
	coords = np.argwhere(mask)

	# Extract bounding box coordinates
	y_min, x_min = coords.min(axis=0)
	y_max, x_max = coords.max(axis=0)

	# Compute width and height
	width = x_max - x_min + 1
	height = y_max - y_min + 1

	# Expand bounding box
	if expansion_factor > 0:
	x_min = max(0, int(x_min - width * expansion_factor / 2))
	y_min = max(0, int(y_min - height * expansion_factor / 2))
	x_max = min(mask.shape[1] - 1, int(x_max + width * expansion_factor / 2))
	y_max = min(mask.shape[0] - 1, int(y_max + height * expansion_factor / 2))

	# Recalculate width and height after expansion
	width = x_max - x_min + 1
	height = y_max - y_min + 1

	return (x_min, y_min, width, height)