Spaces:

purkrmir
/

BBoxMaskPose-demo

Running on Zero

BBoxMaskPose-demo / mmpose /models /heads /regression_heads /dsnt_head.py

Miroslav Purkrabek

add code

a249588 4 months ago

6.1 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from typing import Optional, Sequence, Tuple, Union

	import numpy as np
	import torch
	from mmengine.logging import MessageHub
	from torch import Tensor

	from mmpose.evaluation.functional import keypoint_pck_accuracy
	from mmpose.registry import MODELS
	from mmpose.utils.tensor_utils import to_numpy
	from mmpose.utils.typing import ConfigType, OptConfigType, OptSampleList
	from .integral_regression_head import IntegralRegressionHead

	OptIntSeq = Optional[Sequence[int]]


	@MODELS.register_module()
	class DSNTHead(IntegralRegressionHead):
	"""Top-down integral regression head introduced in `DSNT`_ by Nibali et
	al(2018). The head contains a differentiable spatial to numerical transform
	(DSNT) layer that do soft-argmax operation on the predicted heatmaps to
	regress the coordinates.

	This head is used for algorithms that require supervision of heatmaps
	in `DSNT` approach.

	Args:
	in_channels (int \| sequence[int]): Number of input channels
	in_featuremap_size (int \| sequence[int]): Size of input feature map
	num_joints (int): Number of joints
	lambda_t (int): Discard heatmap-based loss when current
	epoch > lambda_t. Defaults to -1.
	debias (bool): Whether to remove the bias of Integral Pose Regression.
	see `Removing the Bias of Integral Pose Regression`_ by Gu et al
	(2021). Defaults to ``False``.
	beta (float): A smoothing parameter in softmax. Defaults to ``1.0``.
	deconv_out_channels (sequence[int]): The output channel number of each
	deconv layer. Defaults to ``(256, 256, 256)``
	deconv_kernel_sizes (sequence[int \| tuple], optional): The kernel size
	of each deconv layer. Each element should be either an integer for
	both height and width dimensions, or a tuple of two integers for
	the height and the width dimension respectively.Defaults to
	``(4, 4, 4)``
	conv_out_channels (sequence[int], optional): The output channel number
	of each intermediate conv layer. ``None`` means no intermediate
	conv layer between deconv layers and the final conv layer.
	Defaults to ``None``
	conv_kernel_sizes (sequence[int \| tuple], optional): The kernel size
	of each intermediate conv layer. Defaults to ``None``
	final_layer (dict): Arguments of the final Conv2d layer.
	Defaults to ``dict(kernel_size=1)``
	loss (Config): Config for keypoint loss. Defaults to use
	:class:`DSNTLoss`
	decoder (Config, optional): The decoder config that controls decoding
	keypoint coordinates from the network output. Defaults to ``None``
	init_cfg (Config, optional): Config to control the initialization. See
	:attr:`default_init_cfg` for default settings

	.. _`DSNT`: https://arxiv.org/abs/1801.07372
	"""

	_version = 2

	def __init__(self,
	in_channels: Union[int, Sequence[int]],
	in_featuremap_size: Tuple[int, int],
	num_joints: int,
	lambda_t: int = -1,
	debias: bool = False,
	beta: float = 1.0,
	deconv_out_channels: OptIntSeq = (256, 256, 256),
	deconv_kernel_sizes: OptIntSeq = (4, 4, 4),
	conv_out_channels: OptIntSeq = None,
	conv_kernel_sizes: OptIntSeq = None,
	final_layer: dict = dict(kernel_size=1),
	loss: ConfigType = dict(
	type='MultipleLossWrapper',
	losses=[
	dict(type='SmoothL1Loss', use_target_weight=True),
	dict(type='JSDiscretLoss', use_target_weight=True)
	]),
	decoder: OptConfigType = None,
	init_cfg: OptConfigType = None):

	super().__init__(
	in_channels=in_channels,
	in_featuremap_size=in_featuremap_size,
	num_joints=num_joints,
	debias=debias,
	beta=beta,
	deconv_out_channels=deconv_out_channels,
	deconv_kernel_sizes=deconv_kernel_sizes,
	conv_out_channels=conv_out_channels,
	conv_kernel_sizes=conv_kernel_sizes,
	final_layer=final_layer,
	loss=loss,
	decoder=decoder,
	init_cfg=init_cfg)

	self.lambda_t = lambda_t

	def loss(self,
	inputs: Tuple[Tensor],
	batch_data_samples: OptSampleList,
	train_cfg: ConfigType = {}) -> dict:
	"""Calculate losses from a batch of inputs and data samples."""

	pred_coords, pred_heatmaps = self.forward(inputs)
	keypoint_labels = torch.cat(
	[d.gt_instance_labels.keypoint_labels for d in batch_data_samples])
	keypoint_weights = torch.cat([
	d.gt_instance_labels.keypoint_weights for d in batch_data_samples
	])
	gt_heatmaps = torch.stack(
	[d.gt_fields.heatmaps for d in batch_data_samples])

	input_list = [pred_coords, pred_heatmaps]
	target_list = [keypoint_labels, gt_heatmaps]
	# calculate losses
	losses = dict()

	loss_list = self.loss_module(input_list, target_list, keypoint_weights)

	loss = loss_list[0] + loss_list[1]

	if self.lambda_t > 0:
	mh = MessageHub.get_current_instance()
	cur_epoch = mh.get_info('epoch')
	if cur_epoch >= self.lambda_t:
	loss = loss_list[0]

	losses.update(loss_kpt=loss)

	# calculate accuracy
	_, avg_acc, _ = keypoint_pck_accuracy(
	pred=to_numpy(pred_coords),
	gt=to_numpy(keypoint_labels),
	mask=to_numpy(keypoint_weights) > 0,
	thr=0.05,
	norm_factor=np.ones((pred_coords.size(0), 2), dtype=np.float32))

	acc_pose = torch.tensor(avg_acc, device=keypoint_labels.device)
	losses.update(acc_pose=acc_pose)

	return losses