Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import math | |
| from functools import partial | |
| from typing import Optional | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from mmpose.datasets.datasets.utils import parse_pose_metainfo | |
| from mmpose.registry import MODELS | |
| from ..utils.realnvp import RealNVP | |
| class RLELoss(nn.Module): | |
| """RLE Loss. | |
| `Human Pose Regression With Residual Log-Likelihood Estimation | |
| arXiv: <https://arxiv.org/abs/2107.11291>`_. | |
| Code is modified from `the official implementation | |
| <https://github.com/Jeff-sjtu/res-loglikelihood-regression>`_. | |
| Args: | |
| use_target_weight (bool): Option to use weighted loss. | |
| Different joint types may have different target weights. | |
| size_average (bool): Option to average the loss by the batch_size. | |
| residual (bool): Option to add L1 loss and let the flow | |
| learn the residual error distribution. | |
| q_dis (string): Option for the identity Q(error) distribution, | |
| Options: "laplace" or "gaussian" | |
| """ | |
| def __init__(self, | |
| use_target_weight=False, | |
| size_average=True, | |
| residual=True, | |
| q_distribution='laplace'): | |
| super(RLELoss, self).__init__() | |
| self.size_average = size_average | |
| self.use_target_weight = use_target_weight | |
| self.residual = residual | |
| self.q_distribution = q_distribution | |
| self.flow_model = RealNVP() | |
| def forward(self, pred, sigma, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| pred (Tensor[N, K, D]): Output regression. | |
| sigma (Tensor[N, K, D]): Output sigma. | |
| target (Tensor[N, K, D]): Target regression. | |
| target_weight (Tensor[N, K, D]): | |
| Weights across different joint types. | |
| """ | |
| sigma = sigma.sigmoid() | |
| error = (pred - target) / (sigma + 1e-9) | |
| # (B, K, 2) | |
| log_phi = self.flow_model.log_prob(error.reshape(-1, 2)) | |
| log_phi = log_phi.reshape(target.shape[0], target.shape[1], 1) | |
| log_sigma = torch.log(sigma).reshape(target.shape[0], target.shape[1], | |
| 2) | |
| nf_loss = log_sigma - log_phi | |
| if self.residual: | |
| assert self.q_distribution in ['laplace', 'gaussian'] | |
| if self.q_distribution == 'laplace': | |
| loss_q = torch.log(sigma * 2) + torch.abs(error) | |
| else: | |
| loss_q = torch.log( | |
| sigma * math.sqrt(2 * math.pi)) + 0.5 * error**2 | |
| loss = nf_loss + loss_q | |
| else: | |
| loss = nf_loss | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss *= target_weight | |
| if self.size_average: | |
| loss /= len(loss) | |
| return loss.sum() | |
| class SmoothL1Loss(nn.Module): | |
| """SmoothL1Loss loss. | |
| Args: | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, use_target_weight=False, loss_weight=1.): | |
| super().__init__() | |
| self.criterion = F.smooth_l1_loss | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N, K, D]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| assert output.ndim >= target_weight.ndim | |
| for i in range(output.ndim - target_weight.ndim): | |
| target_weight = target_weight.unsqueeze(-1) | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class L1LogLoss(nn.Module): | |
| """L1LogLoss loss. | |
| Args: | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, use_target_weight=False, loss_weight=1.): | |
| super().__init__() | |
| self.criterion = F.smooth_l1_loss | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N, K, D]): | |
| Weights across different joint types. | |
| """ | |
| # Use logarithm to compute relative error | |
| output = torch.log(1 + output) | |
| target = torch.log(1 + target) | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| assert output.ndim >= target_weight.ndim | |
| for i in range(output.ndim - target_weight.ndim): | |
| target_weight = target_weight.unsqueeze(-1) | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class SoftWeightSmoothL1Loss(nn.Module): | |
| """Smooth L1 loss with soft weight for regression. | |
| Args: | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| supervise_empty (bool): Whether to supervise the output with zero | |
| weight. | |
| beta (float): Specifies the threshold at which to change between | |
| L1 and L2 loss. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, | |
| use_target_weight=False, | |
| supervise_empty=True, | |
| beta=1.0, | |
| loss_weight=1.): | |
| super().__init__() | |
| reduction = 'none' if use_target_weight else 'mean' | |
| self.criterion = partial( | |
| self.smooth_l1_loss, reduction=reduction, beta=beta) | |
| self.supervise_empty = supervise_empty | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def smooth_l1_loss(input, target, reduction='none', beta=1.0): | |
| """Re-implement torch.nn.functional.smooth_l1_loss with beta to support | |
| pytorch <= 1.6.""" | |
| delta = input - target | |
| mask = delta.abs() < beta | |
| delta[mask] = (delta[mask]).pow(2) / (2 * beta) | |
| delta[~mask] = delta[~mask].abs() - beta / 2 | |
| if reduction == 'mean': | |
| return delta.mean() | |
| elif reduction == 'sum': | |
| return delta.sum() | |
| elif reduction == 'none': | |
| return delta | |
| else: | |
| raise ValueError(f'reduction must be \'mean\', \'sum\' or ' | |
| f'\'none\', but got \'{reduction}\'') | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N, K, D]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| assert output.ndim >= target_weight.ndim | |
| for i in range(output.ndim - target_weight.ndim): | |
| target_weight = target_weight.unsqueeze(-1) | |
| loss = self.criterion(output, target) * target_weight | |
| if self.supervise_empty: | |
| loss = loss.mean() | |
| else: | |
| num_elements = torch.nonzero(target_weight > 0).size()[0] | |
| loss = loss.sum() / max(num_elements, 1.0) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class WingLoss(nn.Module): | |
| """Wing Loss. paper ref: 'Wing Loss for Robust Facial Landmark Localisation | |
| with Convolutional Neural Networks' Feng et al. CVPR'2018. | |
| Args: | |
| omega (float): Also referred to as width. | |
| epsilon (float): Also referred to as curvature. | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, | |
| omega=10.0, | |
| epsilon=2.0, | |
| use_target_weight=False, | |
| loss_weight=1.): | |
| super().__init__() | |
| self.omega = omega | |
| self.epsilon = epsilon | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| # constant that smoothly links the piecewise-defined linear | |
| # and nonlinear parts | |
| self.C = self.omega * (1.0 - math.log(1.0 + self.omega / self.epsilon)) | |
| def criterion(self, pred, target): | |
| """Criterion of wingloss. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| pred (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| """ | |
| delta = (target - pred).abs() | |
| losses = torch.where( | |
| delta < self.omega, | |
| self.omega * torch.log(1.0 + delta / self.epsilon), delta - self.C) | |
| return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0) | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N,K,D]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class SoftWingLoss(nn.Module): | |
| """Soft Wing Loss 'Structure-Coherent Deep Feature Learning for Robust Face | |
| Alignment' Lin et al. TIP'2021. | |
| loss = | |
| 1. |x| , if |x| < omega1 | |
| 2. omega2*ln(1+|x|/epsilon) + B, if |x| >= omega1 | |
| Args: | |
| omega1 (float): The first threshold. | |
| omega2 (float): The second threshold. | |
| epsilon (float): Also referred to as curvature. | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, | |
| omega1=2.0, | |
| omega2=20.0, | |
| epsilon=0.5, | |
| use_target_weight=False, | |
| loss_weight=1.): | |
| super().__init__() | |
| self.omega1 = omega1 | |
| self.omega2 = omega2 | |
| self.epsilon = epsilon | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| # constant that smoothly links the piecewise-defined linear | |
| # and nonlinear parts | |
| self.B = self.omega1 - self.omega2 * math.log(1.0 + self.omega1 / | |
| self.epsilon) | |
| def criterion(self, pred, target): | |
| """Criterion of wingloss. | |
| Note: | |
| batch_size: N | |
| num_keypoints: K | |
| dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| pred (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| """ | |
| delta = (target - pred).abs() | |
| losses = torch.where( | |
| delta < self.omega1, delta, | |
| self.omega2 * torch.log(1.0 + delta / self.epsilon) + self.B) | |
| return torch.mean(torch.sum(losses, dim=[1, 2]), dim=0) | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| batch_size: N | |
| num_keypoints: K | |
| dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N, K, D]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class MPJPEVelocityJointLoss(nn.Module): | |
| """MPJPE (Mean Per Joint Position Error) loss. | |
| Args: | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| lambda_scale (float): Factor of the N-MPJPE loss. Default: 0.5. | |
| lambda_3d_velocity (float): Factor of the velocity loss. Default: 20.0. | |
| """ | |
| def __init__(self, | |
| use_target_weight=False, | |
| loss_weight=1., | |
| lambda_scale=0.5, | |
| lambda_3d_velocity=20.0): | |
| super().__init__() | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| self.lambda_scale = lambda_scale | |
| self.lambda_3d_velocity = lambda_3d_velocity | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N,K,D]): | |
| Weights across different joint types. | |
| """ | |
| norm_output = torch.mean( | |
| torch.sum(torch.square(output), dim=-1, keepdim=True), | |
| dim=-2, | |
| keepdim=True) | |
| norm_target = torch.mean( | |
| torch.sum(target * output, dim=-1, keepdim=True), | |
| dim=-2, | |
| keepdim=True) | |
| velocity_output = output[..., 1:, :, :] - output[..., :-1, :, :] | |
| velocity_target = target[..., 1:, :, :] - target[..., :-1, :, :] | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| mpjpe = torch.mean( | |
| torch.norm((output - target) * target_weight, dim=-1)) | |
| nmpjpe = torch.mean( | |
| torch.norm( | |
| (norm_target / norm_output * output - target) * | |
| target_weight, | |
| dim=-1)) | |
| loss_3d_velocity = torch.mean( | |
| torch.norm( | |
| (velocity_output - velocity_target) * target_weight, | |
| dim=-1)) | |
| else: | |
| mpjpe = torch.mean(torch.norm(output - target, dim=-1)) | |
| nmpjpe = torch.mean( | |
| torch.norm( | |
| norm_target / norm_output * output - target, dim=-1)) | |
| loss_3d_velocity = torch.mean( | |
| torch.norm(velocity_output - velocity_target, dim=-1)) | |
| loss = mpjpe + nmpjpe * self.lambda_scale + \ | |
| loss_3d_velocity * self.lambda_3d_velocity | |
| return loss * self.loss_weight | |
| class MPJPELoss(nn.Module): | |
| """MPJPE (Mean Per Joint Position Error) loss. | |
| Args: | |
| use_target_weight (bool): Option to use weighted MSE loss. | |
| Different joint types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, use_target_weight=False, loss_weight=1.): | |
| super().__init__() | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N,K,D]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss = torch.mean( | |
| torch.norm((output - target) * target_weight, dim=-1)) | |
| else: | |
| loss = torch.mean(torch.norm(output - target, dim=-1)) | |
| return loss * self.loss_weight | |
| class L1Loss(nn.Module): | |
| """L1Loss loss.""" | |
| def __init__(self, | |
| reduction='mean', | |
| use_target_weight=False, | |
| loss_weight=1.): | |
| super().__init__() | |
| assert reduction in ('mean', 'sum', 'none'), f'the argument ' \ | |
| f'`reduction` should be either \'mean\', \'sum\' or \'none\', ' \ | |
| f'but got {reduction}' | |
| self.criterion = partial(F.l1_loss, reduction=reduction) | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| Args: | |
| output (torch.Tensor[N, K, 2]): Output regression. | |
| target (torch.Tensor[N, K, 2]): Target regression. | |
| target_weight (torch.Tensor[N, K, 2]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| for _ in range(target.ndim - target_weight.ndim): | |
| target_weight = target_weight.unsqueeze(-1) | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class MSELoss(nn.Module): | |
| """MSE loss for coordinate regression.""" | |
| def __init__(self, use_target_weight=False, loss_weight=1.): | |
| super().__init__() | |
| self.criterion = F.mse_loss | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| Args: | |
| output (torch.Tensor[N, K, 2]): Output regression. | |
| target (torch.Tensor[N, K, 2]): Target regression. | |
| target_weight (torch.Tensor[N, K, 2]): | |
| Weights across different joint types. | |
| """ | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss = self.criterion(output * target_weight, | |
| target * target_weight) | |
| else: | |
| loss = self.criterion(output, target) | |
| return loss * self.loss_weight | |
| class BoneLoss(nn.Module): | |
| """Bone length loss. | |
| Args: | |
| joint_parents (list): Indices of each joint's parent joint. | |
| use_target_weight (bool): Option to use weighted bone loss. | |
| Different bone types may have different target weights. | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| """ | |
| def __init__(self, joint_parents, use_target_weight=False, loss_weight=1.): | |
| super().__init__() | |
| self.joint_parents = joint_parents | |
| self.use_target_weight = use_target_weight | |
| self.loss_weight = loss_weight | |
| self.non_root_indices = [] | |
| for i in range(len(self.joint_parents)): | |
| if i != self.joint_parents[i]: | |
| self.non_root_indices.append(i) | |
| def forward(self, output, target, target_weight=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_keypoints: K | |
| - dimension of keypoints: D (D=2 or D=3) | |
| Args: | |
| output (torch.Tensor[N, K, D]): Output regression. | |
| target (torch.Tensor[N, K, D]): Target regression. | |
| target_weight (torch.Tensor[N, K-1]): | |
| Weights across different bone types. | |
| """ | |
| output_bone = torch.norm( | |
| output - output[:, self.joint_parents, :], | |
| dim=-1)[:, self.non_root_indices] | |
| target_bone = torch.norm( | |
| target - target[:, self.joint_parents, :], | |
| dim=-1)[:, self.non_root_indices] | |
| if self.use_target_weight: | |
| assert target_weight is not None | |
| loss = torch.mean( | |
| torch.abs((output_bone * target_weight).mean(dim=0) - | |
| (target_bone * target_weight).mean(dim=0))) | |
| else: | |
| loss = torch.mean( | |
| torch.abs(output_bone.mean(dim=0) - target_bone.mean(dim=0))) | |
| return loss * self.loss_weight | |
| class SemiSupervisionLoss(nn.Module): | |
| """Semi-supervision loss for unlabeled data. It is composed of projection | |
| loss and bone loss. | |
| Paper ref: `3D human pose estimation in video with temporal convolutions | |
| and semi-supervised training` Dario Pavllo et al. CVPR'2019. | |
| Args: | |
| joint_parents (list): Indices of each joint's parent joint. | |
| projection_loss_weight (float): Weight for projection loss. | |
| bone_loss_weight (float): Weight for bone loss. | |
| warmup_iterations (int): Number of warmup iterations. In the first | |
| `warmup_iterations` iterations, the model is trained only on | |
| labeled data, and semi-supervision loss will be 0. | |
| This is a workaround since currently we cannot access | |
| epoch number in loss functions. Note that the iteration number in | |
| an epoch can be changed due to different GPU numbers in multi-GPU | |
| settings. So please set this parameter carefully. | |
| warmup_iterations = dataset_size // samples_per_gpu // gpu_num | |
| * warmup_epochs | |
| """ | |
| def __init__(self, | |
| joint_parents, | |
| projection_loss_weight=1., | |
| bone_loss_weight=1., | |
| warmup_iterations=0): | |
| super().__init__() | |
| self.criterion_projection = MPJPELoss( | |
| loss_weight=projection_loss_weight) | |
| self.criterion_bone = BoneLoss( | |
| joint_parents, loss_weight=bone_loss_weight) | |
| self.warmup_iterations = warmup_iterations | |
| self.num_iterations = 0 | |
| def project_joints(x, intrinsics): | |
| """Project 3D joint coordinates to 2D image plane using camera | |
| intrinsic parameters. | |
| Args: | |
| x (torch.Tensor[N, K, 3]): 3D joint coordinates. | |
| intrinsics (torch.Tensor[N, 4] | torch.Tensor[N, 9]): Camera | |
| intrinsics: f (2), c (2), k (3), p (2). | |
| """ | |
| while intrinsics.dim() < x.dim(): | |
| intrinsics.unsqueeze_(1) | |
| f = intrinsics[..., :2] | |
| c = intrinsics[..., 2:4] | |
| _x = torch.clamp(x[:, :, :2] / x[:, :, 2:], -1, 1) | |
| if intrinsics.shape[-1] == 9: | |
| k = intrinsics[..., 4:7] | |
| p = intrinsics[..., 7:9] | |
| r2 = torch.sum(_x[:, :, :2]**2, dim=-1, keepdim=True) | |
| radial = 1 + torch.sum( | |
| k * torch.cat((r2, r2**2, r2**3), dim=-1), | |
| dim=-1, | |
| keepdim=True) | |
| tan = torch.sum(p * _x, dim=-1, keepdim=True) | |
| _x = _x * (radial + tan) + p * r2 | |
| _x = f * _x + c | |
| return _x | |
| def forward(self, output, target): | |
| losses = dict() | |
| self.num_iterations += 1 | |
| if self.num_iterations <= self.warmup_iterations: | |
| return losses | |
| labeled_pose = output['labeled_pose'] | |
| unlabeled_pose = output['unlabeled_pose'] | |
| unlabeled_traj = output['unlabeled_traj'] | |
| unlabeled_target_2d = target['unlabeled_target_2d'] | |
| intrinsics = target['intrinsics'] | |
| # projection loss | |
| unlabeled_output = unlabeled_pose + unlabeled_traj | |
| unlabeled_output_2d = self.project_joints(unlabeled_output, intrinsics) | |
| loss_proj = self.criterion_projection(unlabeled_output_2d, | |
| unlabeled_target_2d, None) | |
| losses['proj_loss'] = loss_proj | |
| # bone loss | |
| loss_bone = self.criterion_bone(unlabeled_pose, labeled_pose, None) | |
| losses['bone_loss'] = loss_bone | |
| return losses | |
| class OKSLoss(nn.Module): | |
| """A PyTorch implementation of the Object Keypoint Similarity (OKS) loss as | |
| described in the paper "YOLO-Pose: Enhancing YOLO for Multi Person Pose | |
| Estimation Using Object Keypoint Similarity Loss" by Debapriya et al. | |
| (2022). | |
| The OKS loss is used for keypoint-based object recognition and consists | |
| of a measure of the similarity between predicted and ground truth | |
| keypoint locations, adjusted by the size of the object in the image. | |
| The loss function takes as input the predicted keypoint locations, the | |
| ground truth keypoint locations, a mask indicating which keypoints are | |
| valid, and bounding boxes for the objects. | |
| Args: | |
| metainfo (Optional[str]): Path to a JSON file containing information | |
| about the dataset's annotations. | |
| reduction (str): Options are "none", "mean" and "sum". | |
| eps (float): Epsilon to avoid log(0). | |
| loss_weight (float): Weight of the loss. Default: 1.0. | |
| mode (str): Loss scaling mode, including "linear", "square", and "log". | |
| Default: 'linear' | |
| norm_target_weight (bool): whether to normalize the target weight | |
| with number of visible keypoints. Defaults to False. | |
| """ | |
| def __init__(self, | |
| metainfo: Optional[str] = None, | |
| reduction='mean', | |
| mode='linear', | |
| eps=1e-8, | |
| norm_target_weight=False, | |
| loss_weight=1.): | |
| super().__init__() | |
| assert reduction in ('mean', 'sum', 'none'), f'the argument ' \ | |
| f'`reduction` should be either \'mean\', \'sum\' or \'none\', ' \ | |
| f'but got {reduction}' | |
| assert mode in ('linear', 'square', 'log'), f'the argument ' \ | |
| f'`reduction` should be either \'linear\', \'square\' or ' \ | |
| f'\'log\', but got {mode}' | |
| self.reduction = reduction | |
| self.loss_weight = loss_weight | |
| self.mode = mode | |
| self.norm_target_weight = norm_target_weight | |
| self.eps = eps | |
| if metainfo is not None: | |
| metainfo = parse_pose_metainfo(dict(from_file=metainfo)) | |
| sigmas = metainfo.get('sigmas', None) | |
| if sigmas is not None: | |
| self.register_buffer('sigmas', torch.as_tensor(sigmas)) | |
| def forward(self, output, target, target_weight=None, areas=None): | |
| """Forward function. | |
| Note: | |
| - batch_size: N | |
| - num_labels: K | |
| Args: | |
| output (torch.Tensor[N, K, 2]): Output keypoints coordinates. | |
| target (torch.Tensor[N, K, 2]): Target keypoints coordinates.. | |
| target_weight (torch.Tensor[N, K]): Loss weight for each keypoint. | |
| areas (torch.Tensor[N]): Instance size which is adopted as | |
| normalization factor. | |
| """ | |
| dist = torch.norm(output - target, dim=-1) | |
| if areas is not None: | |
| dist = dist / areas.pow(0.5).clip(min=self.eps).unsqueeze(-1) | |
| if hasattr(self, 'sigmas'): | |
| sigmas = self.sigmas.reshape(*((1, ) * (dist.ndim - 1)), -1) | |
| dist = dist / (sigmas * 2) | |
| oks = torch.exp(-dist.pow(2) / 2) | |
| if target_weight is not None: | |
| if self.norm_target_weight: | |
| target_weight = target_weight / target_weight.sum( | |
| dim=-1, keepdims=True).clip(min=self.eps) | |
| else: | |
| target_weight = target_weight / target_weight.size(-1) | |
| oks = oks * target_weight | |
| oks = oks.sum(dim=-1) | |
| if self.mode == 'linear': | |
| loss = 1 - oks | |
| elif self.mode == 'square': | |
| loss = 1 - oks.pow(2) | |
| elif self.mode == 'log': | |
| loss = -oks.log() | |
| else: | |
| raise NotImplementedError() | |
| if self.reduction == 'sum': | |
| loss = loss.sum() | |
| elif self.reduction == 'mean': | |
| loss = loss.mean() | |
| return loss * self.loss_weight | |