Upload folder using huggingface_hub

789eef1 verified 9 months ago

7.88 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	from typing import Optional, Tuple

	import cv2
	import numpy as np

	from mmpose.registry import KEYPOINT_CODECS
	from .base import BaseKeypointCodec
	from .utils import (generate_offset_heatmap, generate_udp_gaussian_heatmaps,
	get_heatmap_maximum, refine_keypoints_dark_udp)


	@KEYPOINT_CODECS.register_module()
	class UDPHeatmap(BaseKeypointCodec):
	r"""Generate keypoint heatmaps by Unbiased Data Processing (UDP).
	See the paper: `The Devil is in the Details: Delving into Unbiased Data
	Processing for Human Pose Estimation`_ by Huang et al (2020) for details.

	Note:

	- instance number: N
	- keypoint number: K
	- keypoint dimension: D
	- image size: [w, h]
	- heatmap size: [W, H]

	Encoded:

	- heatmap (np.ndarray): The generated heatmap in shape (C_out, H, W)
	where [W, H] is the `heatmap_size`, and the C_out is the output
	channel number which depends on the `heatmap_type`. If
	`heatmap_type=='gaussian'`, C_out equals to keypoint number K;
	if `heatmap_type=='combined'`, C_out equals to K*3
	(x_offset, y_offset and class label)
	- keypoint_weights (np.ndarray): The target weights in shape (K,)

	Args:
	input_size (tuple): Image size in [w, h]
	heatmap_size (tuple): Heatmap size in [W, H]
	heatmap_type (str): The heatmap type to encode the keypoitns. Options
	are:

	- ``'gaussian'``: Gaussian heatmap
	- ``'combined'``: Combination of a binary label map and offset
	maps for X and Y axes.

	sigma (float): The sigma value of the Gaussian heatmap when
	``heatmap_type=='gaussian'``. Defaults to 2.0
	radius_factor (float): The radius factor of the binary label
	map when ``heatmap_type=='combined'``. The positive region is
	defined as the neighbor of the keypoit with the radius
	:math:`r=radius_factor*max(W, H)`. Defaults to 0.0546875
	blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
	modulation in DarkPose. Defaults to 11

	.. _`The Devil is in the Details: Delving into Unbiased Data Processing for
	Human Pose Estimation`: https://arxiv.org/abs/1911.07524
	"""

	def __init__(self,
	input_size: Tuple[int, int],
	heatmap_size: Tuple[int, int],
	heatmap_type: str = 'gaussian',
	sigma: float = 2.,
	radius_factor: float = 0.0546875,
	blur_kernel_size: int = 11) -> None:
	super().__init__()
	self.input_size = input_size
	self.heatmap_size = heatmap_size
	self.sigma = sigma
	self.radius_factor = radius_factor
	self.heatmap_type = heatmap_type
	self.blur_kernel_size = blur_kernel_size
	self.scale_factor = ((np.array(input_size) - 1) /
	(np.array(heatmap_size) - 1)).astype(np.float32)

	if self.heatmap_type not in {'gaussian', 'combined'}:
	raise ValueError(
	f'{self.__class__.__name__} got invalid `heatmap_type` value'
	f'{self.heatmap_type}. Should be one of '
	'{"gaussian", "combined"}')

	def encode(self,
	keypoints: np.ndarray,
	keypoints_visible: Optional[np.ndarray] = None) -> dict:
	"""Encode keypoints into heatmaps. Note that the original keypoint
	coordinates should be in the input image space.

	Args:
	keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
	keypoints_visible (np.ndarray): Keypoint visibilities in shape
	(N, K)

	Returns:
	dict:
	- heatmap (np.ndarray): The generated heatmap in shape
	(C_out, H, W) where [W, H] is the `heatmap_size`, and the
	C_out is the output channel number which depends on the
	`heatmap_type`. If `heatmap_type=='gaussian'`, C_out equals to
	keypoint number K; if `heatmap_type=='combined'`, C_out
	equals to K*3 (x_offset, y_offset and class label)
	- keypoint_weights (np.ndarray): The target weights in shape
	(K,)
	"""
	assert keypoints.shape[0] == 1, (
	f'{self.__class__.__name__} only support single-instance '
	'keypoint encoding')

	if keypoints_visible is None:
	keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)

	if self.heatmap_type == 'gaussian':
	heatmaps, keypoint_weights = generate_udp_gaussian_heatmaps(
	heatmap_size=self.heatmap_size,
	keypoints=keypoints / self.scale_factor,
	keypoints_visible=keypoints_visible,
	sigma=self.sigma)
	elif self.heatmap_type == 'combined':
	heatmaps, keypoint_weights = generate_offset_heatmap(
	heatmap_size=self.heatmap_size,
	keypoints=keypoints / self.scale_factor,
	keypoints_visible=keypoints_visible,
	radius_factor=self.radius_factor)
	else:
	raise ValueError(
	f'{self.__class__.__name__} got invalid `heatmap_type` value'
	f'{self.heatmap_type}. Should be one of '
	'{"gaussian", "combined"}')

	encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights)

	return encoded

	def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	"""Decode keypoint coordinates from heatmaps. The decoded keypoint
	coordinates are in the input image space.

	Args:
	encoded (np.ndarray): Heatmaps in shape (K, H, W)

	Returns:
	tuple:
	- keypoints (np.ndarray): Decoded keypoint coordinates in shape
	(N, K, D)
	- scores (np.ndarray): The keypoint scores in shape (N, K). It
	usually represents the confidence of the keypoint prediction
	"""
	heatmaps = encoded.copy()

	if self.heatmap_type == 'gaussian':
	keypoints, scores = get_heatmap_maximum(heatmaps)
	# unsqueeze the instance dimension for single-instance results
	keypoints = keypoints[None]
	scores = scores[None]

	keypoints = refine_keypoints_dark_udp(
	keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size)

	elif self.heatmap_type == 'combined':
	_K, H, W = heatmaps.shape
	K = _K // 3

	for cls_heatmap in heatmaps[::3]:
	# Apply Gaussian blur on classification maps
	ks = 2 * self.blur_kernel_size + 1
	cv2.GaussianBlur(cls_heatmap, (ks, ks), 0, cls_heatmap)

	# valid radius
	radius = self.radius_factor * max(W, H)

	x_offset = heatmaps[1::3].flatten() * radius
	y_offset = heatmaps[2::3].flatten() * radius
	keypoints, scores = get_heatmap_maximum(heatmaps=heatmaps[::3])
	index = (keypoints[..., 0] + keypoints[..., 1] * W).flatten()
	index += W * H * np.arange(0, K)
	index = index.astype(int)
	keypoints += np.stack((x_offset[index], y_offset[index]), axis=-1)
	# unsqueeze the instance dimension for single-instance results
	keypoints = keypoints[None].astype(np.float32)
	scores = scores[None]

	W, H = self.heatmap_size
	keypoints = keypoints / [W - 1, H - 1] * self.input_size

	return keypoints, scores