idol-models / submodule /sapiens /pose /mmpose /codecs /decoupled_heatmap.py

Upload folder using huggingface_hub

789eef1 verified 6 months ago

10.5 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import random
	from typing import Optional, Tuple

	import numpy as np

	from mmpose.registry import KEYPOINT_CODECS
	from .base import BaseKeypointCodec
	from .utils import (generate_gaussian_heatmaps, get_diagonal_lengths,
	get_instance_bbox, get_instance_root)
	from .utils.post_processing import get_heatmap_maximum
	from .utils.refinement import refine_keypoints


	@KEYPOINT_CODECS.register_module()
	class DecoupledHeatmap(BaseKeypointCodec):
	"""Encode/decode keypoints with the method introduced in the paper CID.

	See the paper Contextual Instance Decoupling for Robust Multi-Person
	Pose Estimation`_ by Wang et al (2022) for details

	Note:

	- instance number: N
	- keypoint number: K
	- keypoint dimension: D
	- image size: [w, h]
	- heatmap size: [W, H]

	Encoded:
	- heatmaps (np.ndarray): The coupled heatmap in shape
	(1+K, H, W) where [W, H] is the `heatmap_size`.
	- instance_heatmaps (np.ndarray): The decoupled heatmap in shape
	(M*K, H, W) where M is the number of instances.
	- keypoint_weights (np.ndarray): The weight for heatmaps in shape
	(M*K).
	- instance_coords (np.ndarray): The coordinates of instance roots
	in shape (M, 2)

	Args:
	input_size (tuple): Image size in [w, h]
	heatmap_size (tuple): Heatmap size in [W, H]
	root_type (str): The method to generate the instance root. Options
	are:

	- ``'kpt_center'``: Average coordinate of all visible keypoints.
	- ``'bbox_center'``: Center point of bounding boxes outlined by
	all visible keypoints.

	Defaults to ``'kpt_center'``

	heatmap_min_overlap (float): Minimum overlap rate among instances.
	Used when calculating sigmas for instances. Defaults to 0.7
	background_weight (float): Loss weight of background pixels.
	Defaults to 0.1
	encode_max_instances (int): The maximum number of instances
	to encode for each sample. Defaults to 30

	.. _`CID`: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_
	Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_
	CVPR_2022_paper.html
	"""

	# DecoupledHeatmap requires bounding boxes to determine the size of each
	# instance, so that it can assign varying sigmas based on their size
	auxiliary_encode_keys = {'bbox'}

	def __init__(
	self,
	input_size: Tuple[int, int],
	heatmap_size: Tuple[int, int],
	root_type: str = 'kpt_center',
	heatmap_min_overlap: float = 0.7,
	encode_max_instances: int = 30,
	):
	super().__init__()

	self.input_size = input_size
	self.heatmap_size = heatmap_size
	self.root_type = root_type
	self.encode_max_instances = encode_max_instances
	self.heatmap_min_overlap = heatmap_min_overlap

	self.scale_factor = (np.array(input_size) /
	heatmap_size).astype(np.float32)

	def _get_instance_wise_sigmas(
	self,
	bbox: np.ndarray,
	) -> np.ndarray:
	"""Get sigma values for each instance according to their size.

	Args:
	bbox (np.ndarray): Bounding box in shape (N, 4, 2)

	Returns:
	np.ndarray: Array containing the sigma values for each instance.
	"""
	sigmas = np.zeros((bbox.shape[0], ), dtype=np.float32)

	heights = np.sqrt(np.power(bbox[:, 0] - bbox[:, 1], 2).sum(axis=-1))
	widths = np.sqrt(np.power(bbox[:, 0] - bbox[:, 2], 2).sum(axis=-1))

	for i in range(bbox.shape[0]):
	h, w = heights[i], widths[i]

	# compute sigma for each instance
	# condition 1
	a1, b1 = 1, h + w
	c1 = w * h * (1 - self.heatmap_min_overlap) / (
	1 + self.heatmap_min_overlap)
	sq1 = np.sqrt(b1*2 - 4 a1 * c1)
	r1 = (b1 + sq1) / 2

	# condition 2
	a2 = 4
	b2 = 2 * (h + w)
	c2 = (1 - self.heatmap_min_overlap) * w * h
	sq2 = np.sqrt(b2*2 - 4 a2 * c2)
	r2 = (b2 + sq2) / 2

	# condition 3
	a3 = 4 * self.heatmap_min_overlap
	b3 = -2 * self.heatmap_min_overlap * (h + w)
	c3 = (self.heatmap_min_overlap - 1) * w * h
	sq3 = np.sqrt(b3*2 - 4 a3 * c3)
	r3 = (b3 + sq3) / 2

	sigmas[i] = min(r1, r2, r3) / 3

	return sigmas

	def encode(self,
	keypoints: np.ndarray,
	keypoints_visible: Optional[np.ndarray] = None,
	bbox: Optional[np.ndarray] = None) -> dict:
	"""Encode keypoints into heatmaps.

	Args:
	keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
	keypoints_visible (np.ndarray): Keypoint visibilities in shape
	(N, K)
	bbox (np.ndarray): Bounding box in shape (N, 8) which includes
	coordinates of 4 corners.

	Returns:
	dict:
	- heatmaps (np.ndarray): The coupled heatmap in shape
	(1+K, H, W) where [W, H] is the `heatmap_size`.
	- instance_heatmaps (np.ndarray): The decoupled heatmap in shape
	(N*K, H, W) where M is the number of instances.
	- keypoint_weights (np.ndarray): The weight for heatmaps in shape
	(N*K).
	- instance_coords (np.ndarray): The coordinates of instance roots
	in shape (N, 2)
	"""

	if keypoints_visible is None:
	keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
	if bbox is None:
	# generate pseudo bbox via visible keypoints
	bbox = get_instance_bbox(keypoints, keypoints_visible)
	bbox = np.tile(bbox, 2).reshape(-1, 4, 2)
	# corner order: left_top, left_bottom, right_top, right_bottom
	bbox[:, 1:3, 0] = bbox[:, 0:2, 0]

	# keypoint coordinates in heatmap
	_keypoints = keypoints / self.scale_factor
	_bbox = bbox.reshape(-1, 4, 2) / self.scale_factor

	# compute the root and scale of each instance
	roots, roots_visible = get_instance_root(_keypoints, keypoints_visible,
	self.root_type)

	sigmas = self._get_instance_wise_sigmas(_bbox)

	# generate global heatmaps
	heatmaps, keypoint_weights = generate_gaussian_heatmaps(
	heatmap_size=self.heatmap_size,
	keypoints=np.concatenate((_keypoints, roots[:, None]), axis=1),
	keypoints_visible=np.concatenate(
	(keypoints_visible, roots_visible[:, None]), axis=1),
	sigma=sigmas)
	roots_visible = keypoint_weights[:, -1]

	# select instances
	inst_roots, inst_indices = [], []
	diagonal_lengths = get_diagonal_lengths(_keypoints, keypoints_visible)
	for i in np.argsort(diagonal_lengths):
	if roots_visible[i] < 1:
	continue
	# rand root point in 3x3 grid
	x, y = roots[i] + np.random.randint(-1, 2, (2, ))
	x = max(0, min(x, self.heatmap_size[0] - 1))
	y = max(0, min(y, self.heatmap_size[1] - 1))
	if (x, y) not in inst_roots:
	inst_roots.append((x, y))
	inst_indices.append(i)
	if len(inst_indices) > self.encode_max_instances:
	rand_indices = random.sample(
	range(len(inst_indices)), self.encode_max_instances)
	inst_roots = [inst_roots[i] for i in rand_indices]
	inst_indices = [inst_indices[i] for i in rand_indices]

	# generate instance-wise heatmaps
	inst_heatmaps, inst_heatmap_weights = [], []
	for i in inst_indices:
	inst_heatmap, inst_heatmap_weight = generate_gaussian_heatmaps(
	heatmap_size=self.heatmap_size,
	keypoints=_keypoints[i:i + 1],
	keypoints_visible=keypoints_visible[i:i + 1],
	sigma=sigmas[i].item())
	inst_heatmaps.append(inst_heatmap)
	inst_heatmap_weights.append(inst_heatmap_weight)

	if len(inst_indices) > 0:
	inst_heatmaps = np.concatenate(inst_heatmaps)
	inst_heatmap_weights = np.concatenate(inst_heatmap_weights)
	inst_roots = np.array(inst_roots, dtype=np.int32)
	else:
	inst_heatmaps = np.empty((0, *self.heatmap_size[::-1]))
	inst_heatmap_weights = np.empty((0, ))
	inst_roots = np.empty((0, 2), dtype=np.int32)

	encoded = dict(
	heatmaps=heatmaps,
	instance_heatmaps=inst_heatmaps,
	keypoint_weights=inst_heatmap_weights,
	instance_coords=inst_roots)

	return encoded

	def decode(self, instance_heatmaps: np.ndarray,
	instance_scores: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
	"""Decode keypoint coordinates from decoupled heatmaps. The decoded
	keypoint coordinates are in the input image space.

	Args:
	instance_heatmaps (np.ndarray): Heatmaps in shape (N, K, H, W)
	instance_scores (np.ndarray): Confidence of instance roots
	prediction in shape (N, 1)

	Returns:
	tuple:
	- keypoints (np.ndarray): Decoded keypoint coordinates in shape
	(N, K, D)
	- scores (np.ndarray): The keypoint scores in shape (N, K). It
	usually represents the confidence of the keypoint prediction
	"""
	keypoints, keypoint_scores = [], []

	for i in range(instance_heatmaps.shape[0]):
	heatmaps = instance_heatmaps[i].copy()
	kpts, scores = get_heatmap_maximum(heatmaps)
	keypoints.append(refine_keypoints(kpts[None], heatmaps))
	keypoint_scores.append(scores[None])

	keypoints = np.concatenate(keypoints)
	# Restore the keypoint scale
	keypoints = keypoints * self.scale_factor

	keypoint_scores = np.concatenate(keypoint_scores)
	keypoint_scores *= instance_scores

	return keypoints, keypoint_scores