|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from itertools import product |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
from .post_processing import gaussian_blur, gaussian_blur1d |
|
|
|
|
|
|
|
|
def refine_keypoints(keypoints: np.ndarray, |
|
|
heatmaps: np.ndarray) -> np.ndarray: |
|
|
"""Refine keypoint predictions by moving from the maximum towards the |
|
|
second maximum by 0.25 pixel. The operation is in-place. |
|
|
|
|
|
Note: |
|
|
|
|
|
- instance number: N |
|
|
- keypoint number: K |
|
|
- keypoint dimension: D |
|
|
- heatmap size: [W, H] |
|
|
|
|
|
Args: |
|
|
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D) |
|
|
heatmaps (np.ndarray): The heatmaps in shape (K, H, W) |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Refine keypoint coordinates in shape (N, K, D) |
|
|
""" |
|
|
N, K = keypoints.shape[:2] |
|
|
H, W = heatmaps.shape[1:] |
|
|
|
|
|
for n, k in product(range(N), range(K)): |
|
|
x, y = keypoints[n, k, :2].astype(int) |
|
|
|
|
|
if 1 < x < W - 1 and 0 < y < H: |
|
|
dx = heatmaps[k, y, x + 1] - heatmaps[k, y, x - 1] |
|
|
else: |
|
|
dx = 0. |
|
|
|
|
|
if 1 < y < H - 1 and 0 < x < W: |
|
|
dy = heatmaps[k, y + 1, x] - heatmaps[k, y - 1, x] |
|
|
else: |
|
|
dy = 0. |
|
|
|
|
|
keypoints[n, k] += np.sign([dx, dy], dtype=np.float32) * 0.25 |
|
|
|
|
|
return keypoints |
|
|
|
|
|
|
|
|
def refine_keypoints_dark(keypoints: np.ndarray, heatmaps: np.ndarray, |
|
|
blur_kernel_size: int) -> np.ndarray: |
|
|
"""Refine keypoint predictions using distribution aware coordinate |
|
|
decoding. See `Dark Pose`_ for details. The operation is in-place. |
|
|
|
|
|
Note: |
|
|
|
|
|
- instance number: N |
|
|
- keypoint number: K |
|
|
- keypoint dimension: D |
|
|
- heatmap size: [W, H] |
|
|
|
|
|
Args: |
|
|
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D) |
|
|
heatmaps (np.ndarray): The heatmaps in shape (K, H, W) |
|
|
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap |
|
|
modulation |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Refine keypoint coordinates in shape (N, K, D) |
|
|
|
|
|
.. _`Dark Pose`: https://arxiv.org/abs/1910.06278 |
|
|
""" |
|
|
N, K = keypoints.shape[:2] |
|
|
H, W = heatmaps.shape[1:] |
|
|
|
|
|
|
|
|
heatmaps = gaussian_blur(heatmaps, blur_kernel_size) |
|
|
np.maximum(heatmaps, 1e-10, heatmaps) |
|
|
np.log(heatmaps, heatmaps) |
|
|
|
|
|
for n, k in product(range(N), range(K)): |
|
|
x, y = keypoints[n, k, :2].astype(int) |
|
|
if 1 < x < W - 2 and 1 < y < H - 2: |
|
|
dx = 0.5 * (heatmaps[k, y, x + 1] - heatmaps[k, y, x - 1]) |
|
|
dy = 0.5 * (heatmaps[k, y + 1, x] - heatmaps[k, y - 1, x]) |
|
|
|
|
|
dxx = 0.25 * ( |
|
|
heatmaps[k, y, x + 2] - 2 * heatmaps[k, y, x] + |
|
|
heatmaps[k, y, x - 2]) |
|
|
dxy = 0.25 * ( |
|
|
heatmaps[k, y + 1, x + 1] - heatmaps[k, y - 1, x + 1] - |
|
|
heatmaps[k, y + 1, x - 1] + heatmaps[k, y - 1, x - 1]) |
|
|
dyy = 0.25 * ( |
|
|
heatmaps[k, y + 2, x] - 2 * heatmaps[k, y, x] + |
|
|
heatmaps[k, y - 2, x]) |
|
|
derivative = np.array([[dx], [dy]]) |
|
|
hessian = np.array([[dxx, dxy], [dxy, dyy]]) |
|
|
if dxx * dyy - dxy**2 != 0: |
|
|
hessianinv = np.linalg.inv(hessian) |
|
|
offset = -hessianinv @ derivative |
|
|
offset = np.squeeze(np.array(offset.T), axis=0) |
|
|
keypoints[n, k, :2] += offset |
|
|
return keypoints |
|
|
|
|
|
|
|
|
def refine_keypoints_dark_udp(keypoints: np.ndarray, heatmaps: np.ndarray, |
|
|
blur_kernel_size: int) -> np.ndarray: |
|
|
"""Refine keypoint predictions using distribution aware coordinate decoding |
|
|
for UDP. See `UDP`_ for details. The operation is in-place. |
|
|
|
|
|
Note: |
|
|
|
|
|
- instance number: N |
|
|
- keypoint number: K |
|
|
- keypoint dimension: D |
|
|
- heatmap size: [W, H] |
|
|
|
|
|
Args: |
|
|
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D) |
|
|
heatmaps (np.ndarray): The heatmaps in shape (K, H, W) |
|
|
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap |
|
|
modulation |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Refine keypoint coordinates in shape (N, K, D) |
|
|
|
|
|
.. _`UDP`: https://arxiv.org/abs/1911.07524 |
|
|
""" |
|
|
N, K = keypoints.shape[:2] |
|
|
H, W = heatmaps.shape[1:] |
|
|
|
|
|
|
|
|
heatmaps = gaussian_blur(heatmaps, blur_kernel_size) |
|
|
np.clip(heatmaps, 1e-3, 50., heatmaps) |
|
|
np.log(heatmaps, heatmaps) |
|
|
|
|
|
heatmaps_pad = np.pad( |
|
|
heatmaps, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten() |
|
|
|
|
|
for n in range(N): |
|
|
index = keypoints[n, :, 0] + 1 + (keypoints[n, :, 1] + 1) * (W + 2) |
|
|
index += (W + 2) * (H + 2) * np.arange(0, K) |
|
|
index = index.astype(int).reshape(-1, 1) |
|
|
i_ = heatmaps_pad[index] |
|
|
ix1 = heatmaps_pad[index + 1] |
|
|
iy1 = heatmaps_pad[index + W + 2] |
|
|
ix1y1 = heatmaps_pad[index + W + 3] |
|
|
ix1_y1_ = heatmaps_pad[index - W - 3] |
|
|
ix1_ = heatmaps_pad[index - 1] |
|
|
iy1_ = heatmaps_pad[index - 2 - W] |
|
|
|
|
|
dx = 0.5 * (ix1 - ix1_) |
|
|
dy = 0.5 * (iy1 - iy1_) |
|
|
derivative = np.concatenate([dx, dy], axis=1) |
|
|
derivative = derivative.reshape(K, 2, 1) |
|
|
|
|
|
dxx = ix1 - 2 * i_ + ix1_ |
|
|
dyy = iy1 - 2 * i_ + iy1_ |
|
|
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_) |
|
|
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1) |
|
|
hessian = hessian.reshape(K, 2, 2) |
|
|
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2)) |
|
|
keypoints[n] -= np.einsum('imn,ink->imk', hessian, |
|
|
derivative).squeeze() |
|
|
|
|
|
return keypoints |
|
|
|
|
|
|
|
|
def refine_simcc_dark(keypoints: np.ndarray, simcc: np.ndarray, |
|
|
blur_kernel_size: int) -> np.ndarray: |
|
|
"""SimCC version. Refine keypoint predictions using distribution aware |
|
|
coordinate decoding for UDP. See `UDP`_ for details. The operation is in- |
|
|
place. |
|
|
|
|
|
Note: |
|
|
|
|
|
- instance number: N |
|
|
- keypoint number: K |
|
|
- keypoint dimension: D |
|
|
|
|
|
Args: |
|
|
keypoints (np.ndarray): The keypoint coordinates in shape (N, K, D) |
|
|
simcc (np.ndarray): The heatmaps in shape (N, K, Wx) |
|
|
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap |
|
|
modulation |
|
|
|
|
|
Returns: |
|
|
np.ndarray: Refine keypoint coordinates in shape (N, K, D) |
|
|
|
|
|
.. _`UDP`: https://arxiv.org/abs/1911.07524 |
|
|
""" |
|
|
N = simcc.shape[0] |
|
|
|
|
|
|
|
|
simcc = gaussian_blur1d(simcc, blur_kernel_size) |
|
|
np.clip(simcc, 1e-3, 50., simcc) |
|
|
np.log(simcc, simcc) |
|
|
|
|
|
simcc = np.pad(simcc, ((0, 0), (0, 0), (2, 2)), 'edge') |
|
|
|
|
|
for n in range(N): |
|
|
px = (keypoints[n] + 2.5).astype(np.int64).reshape(-1, 1) |
|
|
|
|
|
dx0 = np.take_along_axis(simcc[n], px, axis=1) |
|
|
dx1 = np.take_along_axis(simcc[n], px + 1, axis=1) |
|
|
dx_1 = np.take_along_axis(simcc[n], px - 1, axis=1) |
|
|
dx2 = np.take_along_axis(simcc[n], px + 2, axis=1) |
|
|
dx_2 = np.take_along_axis(simcc[n], px - 2, axis=1) |
|
|
|
|
|
dx = 0.5 * (dx1 - dx_1) |
|
|
dxx = 1e-9 + 0.25 * (dx2 - 2 * dx0 + dx_2) |
|
|
|
|
|
offset = dx / dxx |
|
|
keypoints[n] -= offset.reshape(-1) |
|
|
|
|
|
return keypoints |
|
|
|