Stylique's picture
Upload folder using huggingface_hub
789eef1 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import math
import numpy as np
import cv2
def gaussian_blur(heatmaps: np.ndarray, kernel: int = 11) -> np.ndarray:
"""Modulate heatmap distribution with Gaussian.
Note:
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
heatmaps (np.ndarray[K, H, W]): model predicted heatmaps.
kernel (int): Gaussian kernel size (K) for modulation, which should
match the heatmap gaussian sigma when training.
K=17 for sigma=3 and k=11 for sigma=2.
Returns:
np.ndarray ([K, H, W]): Modulated heatmap distribution.
"""
assert kernel % 2 == 1
border = (kernel - 1) // 2
K, H, W = heatmaps.shape
for k in range(K):
origin_max = np.max(heatmaps[k])
dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32)
dr[border:-border, border:-border] = heatmaps[k].copy()
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
heatmaps[k] = dr[border:-border, border:-border].copy()
heatmaps[k] *= origin_max / np.max(heatmaps[k])
return heatmaps
def get_heatmap_maximum(heatmaps: np.ndarray):
"""Get maximum response location and value from heatmaps.
Note:
batch_size: B
num_keypoints: K
heatmap height: H
heatmap width: W
Args:
heatmaps (np.ndarray): Heatmaps in shape (K, H, W) or (B, K, H, W)
Returns:
tuple:
- locs (np.ndarray): locations of maximum heatmap responses in shape
(K, 2) or (B, K, 2)
- vals (np.ndarray): values of maximum heatmap responses in shape
(K,) or (B, K)
"""
assert isinstance(heatmaps,
np.ndarray), ('heatmaps should be numpy.ndarray')
assert heatmaps.ndim == 3 or heatmaps.ndim == 4, (
f'Invalid shape {heatmaps.shape}')
if heatmaps.ndim == 3:
K, H, W = heatmaps.shape
B = None
heatmaps_flatten = heatmaps.reshape(K, -1)
else:
B, K, H, W = heatmaps.shape
heatmaps_flatten = heatmaps.reshape(B * K, -1)
y_locs, x_locs = np.unravel_index(
np.argmax(heatmaps_flatten, axis=1), shape=(H, W))
locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)
vals = np.amax(heatmaps_flatten, axis=1)
locs[vals <= 0.] = -1
if B:
locs = locs.reshape(B, K, 2)
vals = vals.reshape(B, K)
return locs, vals
def refine_keypoints_dark_udp(keypoints: np.ndarray, heatmaps: np.ndarray,
blur_kernel_size: int) -> np.ndarray:
"""Refine keypoint predictions using distribution aware coordinate decoding
for UDP. See `UDP`_ for details. The operation is in-place.
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- heatmap size: [W, H]
Args:
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D)
heatmaps (np.ndarray): The heatmaps in shape (K, H, W)
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation
Returns:
np.ndarray: Refine keypoint coordinates in shape (N, K, D)
.. _`UDP`: https://arxiv.org/abs/1911.07524
"""
N, K = keypoints.shape[:2]
H, W = heatmaps.shape[1:]
# modulate heatmaps
heatmaps = gaussian_blur(heatmaps, blur_kernel_size)
np.clip(heatmaps, 1e-3, 50., heatmaps)
np.log(heatmaps, heatmaps)
heatmaps_pad = np.pad(
heatmaps, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten()
for n in range(N):
index = keypoints[n, :, 0] + 1 + (keypoints[n, :, 1] + 1) * (W + 2)
index += (W + 2) * (H + 2) * np.arange(0, K)
index = index.astype(int).reshape(-1, 1)
i_ = heatmaps_pad[index]
ix1 = heatmaps_pad[index + 1]
iy1 = heatmaps_pad[index + W + 2]
ix1y1 = heatmaps_pad[index + W + 3]
ix1_y1_ = heatmaps_pad[index - W - 3]
ix1_ = heatmaps_pad[index - 1]
iy1_ = heatmaps_pad[index - 2 - W]
dx = 0.5 * (ix1 - ix1_)
dy = 0.5 * (iy1 - iy1_)
derivative = np.concatenate([dx, dy], axis=1)
derivative = derivative.reshape(K, 2, 1)
dxx = ix1 - 2 * i_ + ix1_
dyy = iy1 - 2 * i_ + iy1_
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
hessian = hessian.reshape(K, 2, 2)
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
keypoints[n] -= np.einsum('imn,ink->imk', hessian,
derivative).squeeze()
return keypoints
def udp_decode(heatmaps, input_size, heatmap_size, blur_kernel_size=11) -> np.ndarray:
"""UDP decoding for keypoint location refinement.
Note:
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
heatmaps (np.ndarray[K, H, W]): model predicted heatmaps.
blur_kernel_size (int): Gaussian kernel size (K) for modulation, which
should match the heatmap gaussian sigma when training.
K=17 for sigma=3 and k=11 for sigma=2.
Returns:
np.ndarray ([K, H, W]): Refined keypoint locations.
"""
keypoints, scores = get_heatmap_maximum(heatmaps)
# unsqueeze the instance dimension for single-instance results
keypoints = keypoints[None]
scores = scores[None]
keypoints = refine_keypoints_dark_udp(
keypoints, heatmaps, blur_kernel_size=blur_kernel_size)
W, H = heatmap_size
keypoints = (keypoints / [W - 1, H - 1]) * input_size
return keypoints, scores
def get_udp_warp_matrix(
center: np.ndarray,
scale: np.ndarray,
rot: float,
output_size,
) -> np.ndarray:
"""Calculate the affine transformation matrix under the unbiased
constraint. See `UDP (CVPR 2020)`_ for details.
Note:
- The bbox number: N
Args:
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
rot (float): Rotation angle (degree).
output_size (tuple): Size ([w, h]) of the output image
Returns:
np.ndarray: A 2x3 transformation matrix
.. _`UDP (CVPR 2020)`: https://arxiv.org/abs/1911.07524
"""
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
input_size = center * 2
rot_rad = np.deg2rad(rot)
warp_mat = np.zeros((2, 3), dtype=np.float32)
scale_x = (output_size[0] - 1) / scale[0]
scale_y = (output_size[1] - 1) / scale[1]
warp_mat[0, 0] = math.cos(rot_rad) * scale_x
warp_mat[0, 1] = -math.sin(rot_rad) * scale_x
warp_mat[0, 2] = scale_x * (-0.5 * input_size[0] * math.cos(rot_rad) +
0.5 * input_size[1] * math.sin(rot_rad) +
0.5 * scale[0])
warp_mat[1, 0] = math.sin(rot_rad) * scale_y
warp_mat[1, 1] = math.cos(rot_rad) * scale_y
warp_mat[1, 2] = scale_y * (-0.5 * input_size[0] * math.sin(rot_rad) -
0.5 * input_size[1] * math.cos(rot_rad) +
0.5 * scale[1])
return warp_mat
def top_down_affine_transform(img, bbox, padding=1.25):
"""
Args:
img (np.ndarray): Image to be transformed.
bbox (np.ndarray): Bounding box to be transformed.
padding (int): Padding size.
Returns:
np.ndarray: Transformed image.
np.ndarray: Transformed bounding box.
"""
dim = bbox.ndim
if dim == 1:
bbox = bbox[None, :]
x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3])
center = np.hstack([x1 + x2, y1 + y2]) * 0.5
scale = np.hstack([x2 - x1, y2 - y1]) * padding
if dim == 1:
center = center[0]
scale = scale[0]
h, w = img.shape[:2]
warp_size = (int(w), int(h))
aspect_ratio = w / h
# reshape bbox to fixed aspect ratio
box_w, box_h = np.hsplit(scale, [1])
scale = np.where(box_w > box_h * aspect_ratio,
np.hstack([box_w, box_w / aspect_ratio]),
np.hstack([box_h * aspect_ratio, box_h]))
rot = 0.
warp_mat = get_udp_warp_matrix(
center, scale, rot, output_size=(w, h))
img = cv2.warpAffine(
img, warp_mat, warp_size, flags=cv2.INTER_LINEAR)
return img, [center], [scale]
def nms(dets: np.ndarray, thr: float):
"""Greedily select boxes with high confidence and overlap <= thr.
Args:
dets (np.ndarray): [[x1, y1, x2, y2, score]].
thr (float): Retain overlap < thr.
Returns:
list: Indexes to keep.
"""
if len(dets) == 0:
return []
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while len(order) > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thr)[0]
order = order[inds + 1]
return keep