idol-models / submodule /sapiens /lite /demo /pose_utils.py

Upload folder using huggingface_hub

789eef1 verified 6 months ago

9.68 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import math

	import numpy as np
	import cv2


	def gaussian_blur(heatmaps: np.ndarray, kernel: int = 11) -> np.ndarray:
	"""Modulate heatmap distribution with Gaussian.

	Note:
	- num_keypoints: K
	- heatmap height: H
	- heatmap width: W

	Args:
	heatmaps (np.ndarray[K, H, W]): model predicted heatmaps.
	kernel (int): Gaussian kernel size (K) for modulation, which should
	match the heatmap gaussian sigma when training.
	K=17 for sigma=3 and k=11 for sigma=2.

	Returns:
	np.ndarray ([K, H, W]): Modulated heatmap distribution.
	"""
	assert kernel % 2 == 1

	border = (kernel - 1) // 2
	K, H, W = heatmaps.shape

	for k in range(K):
	origin_max = np.max(heatmaps[k])
	dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32)
	dr[border:-border, border:-border] = heatmaps[k].copy()
	dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
	heatmaps[k] = dr[border:-border, border:-border].copy()
	heatmaps[k] *= origin_max / np.max(heatmaps[k])
	return heatmaps


	def get_heatmap_maximum(heatmaps: np.ndarray):
	"""Get maximum response location and value from heatmaps.

	Note:
	batch_size: B
	num_keypoints: K
	heatmap height: H
	heatmap width: W

	Args:
	heatmaps (np.ndarray): Heatmaps in shape (K, H, W) or (B, K, H, W)

	Returns:
	tuple:
	- locs (np.ndarray): locations of maximum heatmap responses in shape
	(K, 2) or (B, K, 2)
	- vals (np.ndarray): values of maximum heatmap responses in shape
	(K,) or (B, K)
	"""
	assert isinstance(heatmaps,
	np.ndarray), ('heatmaps should be numpy.ndarray')
	assert heatmaps.ndim == 3 or heatmaps.ndim == 4, (
	f'Invalid shape {heatmaps.shape}')

	if heatmaps.ndim == 3:
	K, H, W = heatmaps.shape
	B = None
	heatmaps_flatten = heatmaps.reshape(K, -1)
	else:
	B, K, H, W = heatmaps.shape
	heatmaps_flatten = heatmaps.reshape(B * K, -1)

	y_locs, x_locs = np.unravel_index(
	np.argmax(heatmaps_flatten, axis=1), shape=(H, W))
	locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)
	vals = np.amax(heatmaps_flatten, axis=1)
	locs[vals <= 0.] = -1

	if B:
	locs = locs.reshape(B, K, 2)
	vals = vals.reshape(B, K)

	return locs, vals


	def refine_keypoints_dark_udp(keypoints: np.ndarray, heatmaps: np.ndarray,
	blur_kernel_size: int) -> np.ndarray:
	"""Refine keypoint predictions using distribution aware coordinate decoding
	for UDP. See `UDP`_ for details. The operation is in-place.

	Note:

	- instance number: N
	- keypoint number: K
	- keypoint dimension: D
	- heatmap size: [W, H]

	Args:
	keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D)
	heatmaps (np.ndarray): The heatmaps in shape (K, H, W)
	blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
	modulation

	Returns:
	np.ndarray: Refine keypoint coordinates in shape (N, K, D)

	.. _`UDP`: https://arxiv.org/abs/1911.07524
	"""
	N, K = keypoints.shape[:2]
	H, W = heatmaps.shape[1:]

	# modulate heatmaps
	heatmaps = gaussian_blur(heatmaps, blur_kernel_size)
	np.clip(heatmaps, 1e-3, 50., heatmaps)
	np.log(heatmaps, heatmaps)

	heatmaps_pad = np.pad(
	heatmaps, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten()

	for n in range(N):
	index = keypoints[n, :, 0] + 1 + (keypoints[n, :, 1] + 1) * (W + 2)
	index += (W + 2) * (H + 2) * np.arange(0, K)
	index = index.astype(int).reshape(-1, 1)
	i_ = heatmaps_pad[index]
	ix1 = heatmaps_pad[index + 1]
	iy1 = heatmaps_pad[index + W + 2]
	ix1y1 = heatmaps_pad[index + W + 3]
	ix1_y1_ = heatmaps_pad[index - W - 3]
	ix1_ = heatmaps_pad[index - 1]
	iy1_ = heatmaps_pad[index - 2 - W]

	dx = 0.5 * (ix1 - ix1_)
	dy = 0.5 * (iy1 - iy1_)
	derivative = np.concatenate([dx, dy], axis=1)
	derivative = derivative.reshape(K, 2, 1)

	dxx = ix1 - 2 * i_ + ix1_
	dyy = iy1 - 2 * i_ + iy1_
	dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
	hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
	hessian = hessian.reshape(K, 2, 2)
	hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
	keypoints[n] -= np.einsum('imn,ink->imk', hessian,
	derivative).squeeze()

	return keypoints


	def udp_decode(heatmaps, input_size, heatmap_size, blur_kernel_size=11) -> np.ndarray:
	"""UDP decoding for keypoint location refinement.

	Note:
	- num_keypoints: K
	- heatmap height: H
	- heatmap width: W

	Args:
	heatmaps (np.ndarray[K, H, W]): model predicted heatmaps.
	blur_kernel_size (int): Gaussian kernel size (K) for modulation, which
	should match the heatmap gaussian sigma when training.
	K=17 for sigma=3 and k=11 for sigma=2.

	Returns:
	np.ndarray ([K, H, W]): Refined keypoint locations.
	"""
	keypoints, scores = get_heatmap_maximum(heatmaps)
	# unsqueeze the instance dimension for single-instance results
	keypoints = keypoints[None]
	scores = scores[None]
	keypoints = refine_keypoints_dark_udp(
	keypoints, heatmaps, blur_kernel_size=blur_kernel_size)

	W, H = heatmap_size
	keypoints = (keypoints / [W - 1, H - 1]) * input_size
	return keypoints, scores


	def get_udp_warp_matrix(
	center: np.ndarray,
	scale: np.ndarray,
	rot: float,
	output_size,
	) -> np.ndarray:
	"""Calculate the affine transformation matrix under the unbiased
	constraint. See `UDP (CVPR 2020)`_ for details.

	Note:

	- The bbox number: N

	Args:
	center (np.ndarray[2, ]): Center of the bounding box (x, y).
	scale (np.ndarray[2, ]): Scale of the bounding box
	wrt [width, height].
	rot (float): Rotation angle (degree).
	output_size (tuple): Size ([w, h]) of the output image

	Returns:
	np.ndarray: A 2x3 transformation matrix

	.. _`UDP (CVPR 2020)`: https://arxiv.org/abs/1911.07524
	"""
	assert len(center) == 2
	assert len(scale) == 2
	assert len(output_size) == 2

	input_size = center * 2
	rot_rad = np.deg2rad(rot)
	warp_mat = np.zeros((2, 3), dtype=np.float32)
	scale_x = (output_size[0] - 1) / scale[0]
	scale_y = (output_size[1] - 1) / scale[1]
	warp_mat[0, 0] = math.cos(rot_rad) * scale_x
	warp_mat[0, 1] = -math.sin(rot_rad) * scale_x
	warp_mat[0, 2] = scale_x * (-0.5 * input_size[0] * math.cos(rot_rad) +
	0.5 * input_size[1] * math.sin(rot_rad) +
	0.5 * scale[0])
	warp_mat[1, 0] = math.sin(rot_rad) * scale_y
	warp_mat[1, 1] = math.cos(rot_rad) * scale_y
	warp_mat[1, 2] = scale_y * (-0.5 * input_size[0] * math.sin(rot_rad) -
	0.5 * input_size[1] * math.cos(rot_rad) +
	0.5 * scale[1])
	return warp_mat


	def top_down_affine_transform(img, bbox, padding=1.25):
	"""
	Args:
	img (np.ndarray): Image to be transformed.
	bbox (np.ndarray): Bounding box to be transformed.
	padding (int): Padding size.

	Returns:
	np.ndarray: Transformed image.
	np.ndarray: Transformed bounding box.
	"""
	dim = bbox.ndim
	if dim == 1:
	bbox = bbox[None, :]

	x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
	center = np.hstack([x1 + x2, y1 + y2]) * 0.5
	scale = np.hstack([x2 - x1, y2 - y1]) * padding

	if dim == 1:
	center = center[0]
	scale = scale[0]

	h, w = img.shape[:2]
	warp_size = (int(w), int(h))
	aspect_ratio = w / h

	# reshape bbox to fixed aspect ratio
	box_w, box_h = np.hsplit(scale, [1])
	scale = np.where(box_w > box_h * aspect_ratio,
	np.hstack([box_w, box_w / aspect_ratio]),
	np.hstack([box_h * aspect_ratio, box_h]))

	rot = 0.

	warp_mat = get_udp_warp_matrix(
	center, scale, rot, output_size=(w, h))

	img = cv2.warpAffine(
	img, warp_mat, warp_size, flags=cv2.INTER_LINEAR)

	return img, [center], [scale]


	def nms(dets: np.ndarray, thr: float):
	"""Greedily select boxes with high confidence and overlap <= thr.

	Args:
	dets (np.ndarray): [[x1, y1, x2, y2, score]].
	thr (float): Retain overlap < thr.

	Returns:
	list: Indexes to keep.
	"""
	if len(dets) == 0:
	return []

	x1 = dets[:, 0]
	y1 = dets[:, 1]
	x2 = dets[:, 2]
	y2 = dets[:, 3]
	scores = dets[:, 4]

	areas = (x2 - x1 + 1) * (y2 - y1 + 1)
	order = scores.argsort()[::-1]

	keep = []
	while len(order) > 0:
	i = order[0]
	keep.append(i)
	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h
	ovr = inter / (areas[i] + areas[order[1:]] - inter)

	inds = np.where(ovr <= thr)[0]
	order = order[inds + 1]

	return keep