|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
|
|
|
|
import numpy as np |
|
|
import cv2 |
|
|
|
|
|
|
|
|
def gaussian_blur(heatmaps: np.ndarray, kernel: int = 11) -> np.ndarray: |
|
|
"""Modulate heatmap distribution with Gaussian. |
|
|
|
|
|
Note: |
|
|
- num_keypoints: K |
|
|
- heatmap height: H |
|
|
- heatmap width: W |
|
|
|
|
|
Args: |
|
|
heatmaps (np.ndarray[K, H, W]): model predicted heatmaps. |
|
|
kernel (int): Gaussian kernel size (K) for modulation, which should |
|
|
match the heatmap gaussian sigma when training. |
|
|
K=17 for sigma=3 and k=11 for sigma=2. |
|
|
|
|
|
Returns: |
|
|
np.ndarray ([K, H, W]): Modulated heatmap distribution. |
|
|
""" |
|
|
assert kernel % 2 == 1 |
|
|
|
|
|
border = (kernel - 1) // 2 |
|
|
K, H, W = heatmaps.shape |
|
|
|
|
|
for k in range(K): |
|
|
origin_max = np.max(heatmaps[k]) |
|
|
dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32) |
|
|
dr[border:-border, border:-border] = heatmaps[k].copy() |
|
|
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) |
|
|
heatmaps[k] = dr[border:-border, border:-border].copy() |
|
|
heatmaps[k] *= origin_max / np.max(heatmaps[k]) |
|
|
return heatmaps |
|
|
|
|
|
|
|
|
def get_heatmap_maximum(heatmaps: np.ndarray): |
|
|
"""Get maximum response location and value from heatmaps. |
|
|
|
|
|
Note: |
|
|
batch_size: B |
|
|
num_keypoints: K |
|
|
heatmap height: H |
|
|
heatmap width: W |
|
|
|
|
|
Args: |
|
|
heatmaps (np.ndarray): Heatmaps in shape (K, H, W) or (B, K, H, W) |
|
|
|
|
|
Returns: |
|
|
tuple: |
|
|
- locs (np.ndarray): locations of maximum heatmap responses in shape |
|
|
(K, 2) or (B, K, 2) |
|
|
- vals (np.ndarray): values of maximum heatmap responses in shape |
|
|
(K,) or (B, K) |
|
|
""" |
|
|
assert isinstance(heatmaps, |
|
|
np.ndarray), ('heatmaps should be numpy.ndarray') |
|
|
assert heatmaps.ndim == 3 or heatmaps.ndim == 4, ( |
|
|
f'Invalid shape {heatmaps.shape}') |
|
|
|
|
|
if heatmaps.ndim == 3: |
|
|
K, H, W = heatmaps.shape |
|
|
B = None |
|
|
heatmaps_flatten = heatmaps.reshape(K, -1) |
|
|
else: |
|
|
B, K, H, W = heatmaps.shape |
|
|
heatmaps_flatten = heatmaps.reshape(B * K, -1) |
|
|
|
|
|
y_locs, x_locs = np.unravel_index( |
|
|
np.argmax(heatmaps_flatten, axis=1), shape=(H, W)) |
|
|
locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32) |
|
|
vals = np.amax(heatmaps_flatten, axis=1) |
|
|
locs[vals <= 0.] = -1 |
|
|
|
|
|
if B: |
|
|
locs = locs.reshape(B, K, 2) |
|
|
vals = vals.reshape(B, K) |
|
|
|
|
|
return locs, vals |
|
|
|
|
|
|
|
|
def refine_keypoints_dark_udp(keypoints: np.ndarray, heatmaps: np.ndarray, |
|
|
blur_kernel_size: int) -> np.ndarray: |
|
|
"""Refine keypoint predictions using distribution aware coordinate decoding |
|
|
for UDP. See `UDP`_ for details. The operation is in-place. |
|
|
|
|
|
Note: |
|
|
|
|
|
- instance number: N |
|
|
- keypoint number: K |
|
|
- keypoint dimension: D |
|
|
- heatmap size: [W, H] |
|
|
|
|
|
Args: |
|
|
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D) |
|
|
heatmaps (np.ndarray): The heatmaps in shape (K, H, W) |
|
|
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap |
|
|
modulation |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Refine keypoint coordinates in shape (N, K, D) |
|
|
|
|
|
.. _`UDP`: https://arxiv.org/abs/1911.07524 |
|
|
""" |
|
|
N, K = keypoints.shape[:2] |
|
|
H, W = heatmaps.shape[1:] |
|
|
|
|
|
|
|
|
heatmaps = gaussian_blur(heatmaps, blur_kernel_size) |
|
|
np.clip(heatmaps, 1e-3, 50., heatmaps) |
|
|
np.log(heatmaps, heatmaps) |
|
|
|
|
|
heatmaps_pad = np.pad( |
|
|
heatmaps, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten() |
|
|
|
|
|
for n in range(N): |
|
|
index = keypoints[n, :, 0] + 1 + (keypoints[n, :, 1] + 1) * (W + 2) |
|
|
index += (W + 2) * (H + 2) * np.arange(0, K) |
|
|
index = index.astype(int).reshape(-1, 1) |
|
|
i_ = heatmaps_pad[index] |
|
|
ix1 = heatmaps_pad[index + 1] |
|
|
iy1 = heatmaps_pad[index + W + 2] |
|
|
ix1y1 = heatmaps_pad[index + W + 3] |
|
|
ix1_y1_ = heatmaps_pad[index - W - 3] |
|
|
ix1_ = heatmaps_pad[index - 1] |
|
|
iy1_ = heatmaps_pad[index - 2 - W] |
|
|
|
|
|
dx = 0.5 * (ix1 - ix1_) |
|
|
dy = 0.5 * (iy1 - iy1_) |
|
|
derivative = np.concatenate([dx, dy], axis=1) |
|
|
derivative = derivative.reshape(K, 2, 1) |
|
|
|
|
|
dxx = ix1 - 2 * i_ + ix1_ |
|
|
dyy = iy1 - 2 * i_ + iy1_ |
|
|
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_) |
|
|
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1) |
|
|
hessian = hessian.reshape(K, 2, 2) |
|
|
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2)) |
|
|
keypoints[n] -= np.einsum('imn,ink->imk', hessian, |
|
|
derivative).squeeze() |
|
|
|
|
|
return keypoints |
|
|
|
|
|
|
|
|
def udp_decode(heatmaps, input_size, heatmap_size, blur_kernel_size=11) -> np.ndarray: |
|
|
"""UDP decoding for keypoint location refinement. |
|
|
|
|
|
Note: |
|
|
- num_keypoints: K |
|
|
- heatmap height: H |
|
|
- heatmap width: W |
|
|
|
|
|
Args: |
|
|
heatmaps (np.ndarray[K, H, W]): model predicted heatmaps. |
|
|
blur_kernel_size (int): Gaussian kernel size (K) for modulation, which |
|
|
should match the heatmap gaussian sigma when training. |
|
|
K=17 for sigma=3 and k=11 for sigma=2. |
|
|
|
|
|
Returns: |
|
|
np.ndarray ([K, H, W]): Refined keypoint locations. |
|
|
""" |
|
|
keypoints, scores = get_heatmap_maximum(heatmaps) |
|
|
|
|
|
keypoints = keypoints[None] |
|
|
scores = scores[None] |
|
|
keypoints = refine_keypoints_dark_udp( |
|
|
keypoints, heatmaps, blur_kernel_size=blur_kernel_size) |
|
|
|
|
|
W, H = heatmap_size |
|
|
keypoints = (keypoints / [W - 1, H - 1]) * input_size |
|
|
return keypoints, scores |
|
|
|
|
|
|
|
|
def get_udp_warp_matrix( |
|
|
center: np.ndarray, |
|
|
scale: np.ndarray, |
|
|
rot: float, |
|
|
output_size, |
|
|
) -> np.ndarray: |
|
|
"""Calculate the affine transformation matrix under the unbiased |
|
|
constraint. See `UDP (CVPR 2020)`_ for details. |
|
|
|
|
|
Note: |
|
|
|
|
|
- The bbox number: N |
|
|
|
|
|
Args: |
|
|
center (np.ndarray[2, ]): Center of the bounding box (x, y). |
|
|
scale (np.ndarray[2, ]): Scale of the bounding box |
|
|
wrt [width, height]. |
|
|
rot (float): Rotation angle (degree). |
|
|
output_size (tuple): Size ([w, h]) of the output image |
|
|
|
|
|
Returns: |
|
|
np.ndarray: A 2x3 transformation matrix |
|
|
|
|
|
.. _`UDP (CVPR 2020)`: https://arxiv.org/abs/1911.07524 |
|
|
""" |
|
|
assert len(center) == 2 |
|
|
assert len(scale) == 2 |
|
|
assert len(output_size) == 2 |
|
|
|
|
|
input_size = center * 2 |
|
|
rot_rad = np.deg2rad(rot) |
|
|
warp_mat = np.zeros((2, 3), dtype=np.float32) |
|
|
scale_x = (output_size[0] - 1) / scale[0] |
|
|
scale_y = (output_size[1] - 1) / scale[1] |
|
|
warp_mat[0, 0] = math.cos(rot_rad) * scale_x |
|
|
warp_mat[0, 1] = -math.sin(rot_rad) * scale_x |
|
|
warp_mat[0, 2] = scale_x * (-0.5 * input_size[0] * math.cos(rot_rad) + |
|
|
0.5 * input_size[1] * math.sin(rot_rad) + |
|
|
0.5 * scale[0]) |
|
|
warp_mat[1, 0] = math.sin(rot_rad) * scale_y |
|
|
warp_mat[1, 1] = math.cos(rot_rad) * scale_y |
|
|
warp_mat[1, 2] = scale_y * (-0.5 * input_size[0] * math.sin(rot_rad) - |
|
|
0.5 * input_size[1] * math.cos(rot_rad) + |
|
|
0.5 * scale[1]) |
|
|
return warp_mat |
|
|
|
|
|
|
|
|
def top_down_affine_transform(img, bbox, padding=1.25): |
|
|
""" |
|
|
Args: |
|
|
img (np.ndarray): Image to be transformed. |
|
|
bbox (np.ndarray): Bounding box to be transformed. |
|
|
padding (int): Padding size. |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Transformed image. |
|
|
np.ndarray: Transformed bounding box. |
|
|
""" |
|
|
dim = bbox.ndim |
|
|
if dim == 1: |
|
|
bbox = bbox[None, :] |
|
|
|
|
|
x1, y1, x2, y2 = np.hsplit(bbox, [1, 2, 3]) |
|
|
center = np.hstack([x1 + x2, y1 + y2]) * 0.5 |
|
|
scale = np.hstack([x2 - x1, y2 - y1]) * padding |
|
|
|
|
|
if dim == 1: |
|
|
center = center[0] |
|
|
scale = scale[0] |
|
|
|
|
|
h, w = img.shape[:2] |
|
|
warp_size = (int(w), int(h)) |
|
|
aspect_ratio = w / h |
|
|
|
|
|
|
|
|
box_w, box_h = np.hsplit(scale, [1]) |
|
|
scale = np.where(box_w > box_h * aspect_ratio, |
|
|
np.hstack([box_w, box_w / aspect_ratio]), |
|
|
np.hstack([box_h * aspect_ratio, box_h])) |
|
|
|
|
|
rot = 0. |
|
|
|
|
|
warp_mat = get_udp_warp_matrix( |
|
|
center, scale, rot, output_size=(w, h)) |
|
|
|
|
|
img = cv2.warpAffine( |
|
|
img, warp_mat, warp_size, flags=cv2.INTER_LINEAR) |
|
|
|
|
|
return img, [center], [scale] |
|
|
|
|
|
|
|
|
def nms(dets: np.ndarray, thr: float): |
|
|
"""Greedily select boxes with high confidence and overlap <= thr. |
|
|
|
|
|
Args: |
|
|
dets (np.ndarray): [[x1, y1, x2, y2, score]]. |
|
|
thr (float): Retain overlap < thr. |
|
|
|
|
|
Returns: |
|
|
list: Indexes to keep. |
|
|
""" |
|
|
if len(dets) == 0: |
|
|
return [] |
|
|
|
|
|
x1 = dets[:, 0] |
|
|
y1 = dets[:, 1] |
|
|
x2 = dets[:, 2] |
|
|
y2 = dets[:, 3] |
|
|
scores = dets[:, 4] |
|
|
|
|
|
areas = (x2 - x1 + 1) * (y2 - y1 + 1) |
|
|
order = scores.argsort()[::-1] |
|
|
|
|
|
keep = [] |
|
|
while len(order) > 0: |
|
|
i = order[0] |
|
|
keep.append(i) |
|
|
xx1 = np.maximum(x1[i], x1[order[1:]]) |
|
|
yy1 = np.maximum(y1[i], y1[order[1:]]) |
|
|
xx2 = np.minimum(x2[i], x2[order[1:]]) |
|
|
yy2 = np.minimum(y2[i], y2[order[1:]]) |
|
|
|
|
|
w = np.maximum(0.0, xx2 - xx1 + 1) |
|
|
h = np.maximum(0.0, yy2 - yy1 + 1) |
|
|
inter = w * h |
|
|
ovr = inter / (areas[i] + areas[order[1:]] - inter) |
|
|
|
|
|
inds = np.where(ovr <= thr)[0] |
|
|
order = order[inds + 1] |
|
|
|
|
|
return keep |
|
|
|