Upload folder using huggingface_hub

789eef1 verified 6 months ago

14.1 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	from typing import Optional, Tuple

	import numpy as np

	from mmpose.codecs.utils import get_heatmap_maximum, get_simcc_maximum
	from .mesh_eval import compute_similarity_transform


	def _calc_distances(preds: np.ndarray, gts: np.ndarray, mask: np.ndarray,
	norm_factor: np.ndarray) -> np.ndarray:
	"""Calculate the normalized distances between preds and target.

	Note:
	- instance number: N
	- keypoint number: K
	- keypoint dimension: D (normally, D=2 or D=3)

	Args:
	preds (np.ndarray[N, K, D]): Predicted keypoint location.
	gts (np.ndarray[N, K, D]): Groundtruth keypoint location.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	norm_factor (np.ndarray[N, D]): Normalization factor.
	Typical value is heatmap_size.

	Returns:
	np.ndarray[K, N]: The normalized distances. \
	If target keypoints are missing, the distance is -1.
	"""
	N, K, _ = preds.shape
	# set mask=0 when norm_factor==0
	_mask = mask.copy()
	_mask[np.where((norm_factor == 0).sum(1))[0], :] = False

	distances = np.full((N, K), -1, dtype=np.float32)
	# handle invalid values
	norm_factor[np.where(norm_factor <= 0)] = 1e6
	distances[_mask] = np.linalg.norm(
	((preds - gts) / norm_factor[:, None, :])[_mask], axis=-1)
	return distances.T


	def _distance_acc(distances: np.ndarray, thr: float = 0.5) -> float:
	"""Return the percentage below the distance threshold, while ignoring
	distances values with -1.

	Note:
	- instance number: N

	Args:
	distances (np.ndarray[N, ]): The normalized distances.
	thr (float): Threshold of the distances.

	Returns:
	float: Percentage of distances below the threshold. \
	If all target keypoints are missing, return -1.
	"""
	distance_valid = distances != -1
	num_distance_valid = distance_valid.sum()
	if num_distance_valid > 0:
	return (distances[distance_valid] < thr).sum() / num_distance_valid
	return -1


	def keypoint_pck_accuracy(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray,
	thr: np.ndarray, norm_factor: np.ndarray) -> tuple:
	"""Calculate the pose accuracy of PCK for each individual keypoint and the
	averaged accuracy across all keypoints for coordinates.

	Note:
	PCK metric measures accuracy of the localization of the body joints.
	The distances between predicted positions and the ground-truth ones
	are typically normalized by the bounding box size.
	The threshold (thr) of the normalized distance is commonly set
	as 0.05, 0.1 or 0.2 etc.

	- instance number: N
	- keypoint number: K

	Args:
	pred (np.ndarray[N, K, 2]): Predicted keypoint location.
	gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	thr (float): Threshold of PCK calculation.
	norm_factor (np.ndarray[N, 2]): Normalization factor for H&W.

	Returns:
	tuple: A tuple containing keypoint accuracy.

	- acc (np.ndarray[K]): Accuracy of each keypoint.
	- avg_acc (float): Averaged accuracy across all keypoints.
	- cnt (int): Number of valid keypoints.
	"""
	distances = _calc_distances(pred, gt, mask, norm_factor)
	acc = np.array([_distance_acc(d, thr) for d in distances])
	valid_acc = acc[acc >= 0]
	cnt = len(valid_acc)
	avg_acc = valid_acc.mean() if cnt > 0 else 0.0
	return acc, avg_acc, cnt


	def keypoint_auc(pred: np.ndarray,
	gt: np.ndarray,
	mask: np.ndarray,
	norm_factor: np.ndarray,
	num_thrs: int = 20) -> float:
	"""Calculate the Area under curve (AUC) of keypoint PCK accuracy.

	Note:
	- instance number: N
	- keypoint number: K

	Args:
	pred (np.ndarray[N, K, 2]): Predicted keypoint location.
	gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	norm_factor (float): Normalization factor.
	num_thrs (int): number of thresholds to calculate auc.

	Returns:
	float: Area under curve (AUC) of keypoint PCK accuracy.
	"""
	nor = np.tile(np.array([[norm_factor, norm_factor]]), (pred.shape[0], 1))
	thrs = [1.0 * i / num_thrs for i in range(num_thrs)]
	avg_accs = []
	for thr in thrs:
	_, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
	avg_accs.append(avg_acc)

	auc = 0
	for i in range(num_thrs):
	auc += 1.0 / num_thrs * avg_accs[i]
	return auc


	def keypoint_nme(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray,
	normalize_factor: np.ndarray) -> float:
	"""Calculate the normalized mean error (NME).

	Note:
	- instance number: N
	- keypoint number: K

	Args:
	pred (np.ndarray[N, K, 2]): Predicted keypoint location.
	gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	normalize_factor (np.ndarray[N, 2]): Normalization factor.

	Returns:
	float: normalized mean error
	"""
	distances = _calc_distances(pred, gt, mask, normalize_factor)
	distance_valid = distances[distances != -1]
	return distance_valid.sum() / max(1, len(distance_valid))


	def keypoint_epe(pred: np.ndarray, gt: np.ndarray, mask: np.ndarray) -> float:
	"""Calculate the end-point error.

	Note:
	- instance number: N
	- keypoint number: K

	Args:
	pred (np.ndarray[N, K, 2]): Predicted keypoint location.
	gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.

	Returns:
	float: Average end-point error.
	"""

	distances = _calc_distances(
	pred, gt, mask,
	np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
	distance_valid = distances[distances != -1]
	return distance_valid.sum() / max(1, len(distance_valid))


	def pose_pck_accuracy(output: np.ndarray,
	target: np.ndarray,
	mask: np.ndarray,
	thr: float = 0.05,
	normalize: Optional[np.ndarray] = None) -> tuple:
	"""Calculate the pose accuracy of PCK for each individual keypoint and the
	averaged accuracy across all keypoints from heatmaps.

	Note:
	PCK metric measures accuracy of the localization of the body joints.
	The distances between predicted positions and the ground-truth ones
	are typically normalized by the bounding box size.
	The threshold (thr) of the normalized distance is commonly set
	as 0.05, 0.1 or 0.2 etc.

	- batch_size: N
	- num_keypoints: K
	- heatmap height: H
	- heatmap width: W

	Args:
	output (np.ndarray[N, K, H, W]): Model output heatmaps.
	target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	thr (float): Threshold of PCK calculation. Default 0.05.
	normalize (np.ndarray[N, 2]): Normalization factor for H&W.

	Returns:
	tuple: A tuple containing keypoint accuracy.

	- np.ndarray[K]: Accuracy of each keypoint.
	- float: Averaged accuracy across all keypoints.
	- int: Number of valid keypoints.
	"""
	N, K, H, W = output.shape
	if K == 0:
	return None, 0, 0
	if normalize is None:
	normalize = np.tile(np.array([[H, W]]), (N, 1))

	pred, _ = get_heatmap_maximum(output)
	gt, _ = get_heatmap_maximum(target)
	return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)


	def simcc_pck_accuracy(output: Tuple[np.ndarray, np.ndarray],
	target: Tuple[np.ndarray, np.ndarray],
	simcc_split_ratio: float,
	mask: np.ndarray,
	thr: float = 0.05,
	normalize: Optional[np.ndarray] = None) -> tuple:
	"""Calculate the pose accuracy of PCK for each individual keypoint and the
	averaged accuracy across all keypoints from SimCC.

	Note:
	PCK metric measures accuracy of the localization of the body joints.
	The distances between predicted positions and the ground-truth ones
	are typically normalized by the bounding box size.
	The threshold (thr) of the normalized distance is commonly set
	as 0.05, 0.1 or 0.2 etc.

	- instance number: N
	- keypoint number: K

	Args:
	output (Tuple[np.ndarray, np.ndarray]): Model predicted SimCC.
	target (Tuple[np.ndarray, np.ndarray]): Groundtruth SimCC.
	mask (np.ndarray[N, K]): Visibility of the target. False for invisible
	joints, and True for visible. Invisible joints will be ignored for
	accuracy calculation.
	thr (float): Threshold of PCK calculation. Default 0.05.
	normalize (np.ndarray[N, 2]): Normalization factor for H&W.

	Returns:
	tuple: A tuple containing keypoint accuracy.

	- np.ndarray[K]: Accuracy of each keypoint.
	- float: Averaged accuracy across all keypoints.
	- int: Number of valid keypoints.
	"""
	pred_x, pred_y = output
	gt_x, gt_y = target

	N, _, Wx = pred_x.shape
	_, _, Wy = pred_y.shape
	W, H = int(Wx / simcc_split_ratio), int(Wy / simcc_split_ratio)

	if normalize is None:
	normalize = np.tile(np.array([[H, W]]), (N, 1))

	pred_coords, _ = get_simcc_maximum(pred_x, pred_y)
	pred_coords /= simcc_split_ratio
	gt_coords, _ = get_simcc_maximum(gt_x, gt_y)
	gt_coords /= simcc_split_ratio

	return keypoint_pck_accuracy(pred_coords, gt_coords, mask, thr, normalize)


	def multilabel_classification_accuracy(pred: np.ndarray,
	gt: np.ndarray,
	mask: np.ndarray,
	thr: float = 0.5) -> float:
	"""Get multi-label classification accuracy.

	Note:
	- batch size: N
	- label number: L

	Args:
	pred (np.ndarray[N, L, 2]): model predicted labels.
	gt (np.ndarray[N, L, 2]): ground-truth labels.
	mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of
	ground-truth labels.
	thr (float): Threshold for calculating accuracy.

	Returns:
	float: multi-label classification accuracy.
	"""
	# we only compute accuracy on the samples with ground-truth of all labels.
	valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)
	pred, gt = pred[valid], gt[valid]

	if pred.shape[0] == 0:
	acc = 0.0 # when no sample is with gt labels, set acc to 0.
	else:
	# The classification of a sample is regarded as correct
	# only if it's correct for all labels.
	acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()
	return acc


	def keypoint_mpjpe(pred: np.ndarray,
	gt: np.ndarray,
	mask: np.ndarray,
	alignment: str = 'none'):
	"""Calculate the mean per-joint position error (MPJPE) and the error after
	rigid alignment with the ground truth (P-MPJPE).

	Note:
	- batch_size: N
	- num_keypoints: K
	- keypoint_dims: C

	Args:
	pred (np.ndarray): Predicted keypoint location with shape [N, K, C].
	gt (np.ndarray): Groundtruth keypoint location with shape [N, K, C].
	mask (np.ndarray): Visibility of the target with shape [N, K].
	False for invisible joints, and True for visible.
	Invisible joints will be ignored for accuracy calculation.
	alignment (str, optional): method to align the prediction with the
	groundtruth. Supported options are:

	- ``'none'``: no alignment will be applied
	- ``'scale'``: align in the least-square sense in scale
	- ``'procrustes'``: align in the least-square sense in
	scale, rotation and translation.

	Returns:
	tuple: A tuple containing joint position errors

	- (float \| np.ndarray): mean per-joint position error (mpjpe).
	- (float \| np.ndarray): mpjpe after rigid alignment with the
	ground truth (p-mpjpe).
	"""
	assert mask.any()

	if alignment == 'none':
	pass
	elif alignment == 'procrustes':
	pred = np.stack([
	compute_similarity_transform(pred_i, gt_i)
	for pred_i, gt_i in zip(pred, gt)
	])
	elif alignment == 'scale':
	pred_dot_pred = np.einsum('nkc,nkc->n', pred, pred)
	pred_dot_gt = np.einsum('nkc,nkc->n', pred, gt)
	scale_factor = pred_dot_gt / pred_dot_pred
	pred = pred * scale_factor[:, None, None]
	else:
	raise ValueError(f'Invalid value for alignment: {alignment}')
	error = np.linalg.norm(pred - gt, ord=2, axis=-1)[mask].mean()

	return error