| |
| from logging import warning |
| from math import ceil, log |
| from typing import List, Optional, Sequence, Tuple |
|
|
| import torch |
| import torch.nn as nn |
| from mmcv.cnn import ConvModule |
| from mmcv.ops import CornerPool, batched_nms |
| from mmengine.config import ConfigDict |
| from mmengine.model import BaseModule, bias_init_with_prob |
| from mmengine.structures import InstanceData |
| from torch import Tensor |
|
|
| from mmdet.registry import MODELS |
| from mmdet.utils import (ConfigType, InstanceList, OptConfigType, |
| OptInstanceList, OptMultiConfig) |
| from ..utils import (gather_feat, gaussian_radius, gen_gaussian_target, |
| get_local_maximum, get_topk_from_heatmap, multi_apply, |
| transpose_and_gather_feat) |
| from .base_dense_head import BaseDenseHead |
|
|
|
|
| class BiCornerPool(BaseModule): |
| """Bidirectional Corner Pooling Module (TopLeft, BottomRight, etc.) |
| |
| Args: |
| in_channels (int): Input channels of module. |
| directions (list[str]): Directions of two CornerPools. |
| out_channels (int): Output channels of module. |
| feat_channels (int): Feature channels of module. |
| norm_cfg (:obj:`ConfigDict` or dict): Dictionary to construct |
| and config norm layer. |
| init_cfg (:obj:`ConfigDict` or dict, optional): the config to |
| control the initialization. |
| """ |
|
|
| def __init__(self, |
| in_channels: int, |
| directions: List[int], |
| feat_channels: int = 128, |
| out_channels: int = 128, |
| norm_cfg: ConfigType = dict(type='BN', requires_grad=True), |
| init_cfg: OptMultiConfig = None) -> None: |
| super().__init__(init_cfg) |
| self.direction1_conv = ConvModule( |
| in_channels, feat_channels, 3, padding=1, norm_cfg=norm_cfg) |
| self.direction2_conv = ConvModule( |
| in_channels, feat_channels, 3, padding=1, norm_cfg=norm_cfg) |
|
|
| self.aftpool_conv = ConvModule( |
| feat_channels, |
| out_channels, |
| 3, |
| padding=1, |
| norm_cfg=norm_cfg, |
| act_cfg=None) |
|
|
| self.conv1 = ConvModule( |
| in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None) |
| self.conv2 = ConvModule( |
| in_channels, out_channels, 3, padding=1, norm_cfg=norm_cfg) |
|
|
| self.direction1_pool = CornerPool(directions[0]) |
| self.direction2_pool = CornerPool(directions[1]) |
| self.relu = nn.ReLU(inplace=True) |
|
|
| def forward(self, x: Tensor) -> Tensor: |
| """Forward features from the upstream network. |
| |
| Args: |
| x (tensor): Input feature of BiCornerPool. |
| |
| Returns: |
| conv2 (tensor): Output feature of BiCornerPool. |
| """ |
| direction1_conv = self.direction1_conv(x) |
| direction2_conv = self.direction2_conv(x) |
| direction1_feat = self.direction1_pool(direction1_conv) |
| direction2_feat = self.direction2_pool(direction2_conv) |
| aftpool_conv = self.aftpool_conv(direction1_feat + direction2_feat) |
| conv1 = self.conv1(x) |
| relu = self.relu(aftpool_conv + conv1) |
| conv2 = self.conv2(relu) |
| return conv2 |
|
|
|
|
| @MODELS.register_module() |
| class CornerHead(BaseDenseHead): |
| """Head of CornerNet: Detecting Objects as Paired Keypoints. |
| |
| Code is modified from the `official github repo |
| <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/ |
| kp.py#L73>`_ . |
| |
| More details can be found in the `paper |
| <https://arxiv.org/abs/1808.01244>`_ . |
| |
| Args: |
| num_classes (int): Number of categories excluding the background |
| category. |
| in_channels (int): Number of channels in the input feature map. |
| num_feat_levels (int): Levels of feature from the previous module. |
| 2 for HourglassNet-104 and 1 for HourglassNet-52. Because |
| HourglassNet-104 outputs the final feature and intermediate |
| supervision feature and HourglassNet-52 only outputs the final |
| feature. Defaults to 2. |
| corner_emb_channels (int): Channel of embedding vector. Defaults to 1. |
| train_cfg (:obj:`ConfigDict` or dict, optional): Training config. |
| Useless in CornerHead, but we keep this variable for |
| SingleStageDetector. |
| test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of |
| CornerHead. |
| loss_heatmap (:obj:`ConfigDict` or dict): Config of corner heatmap |
| loss. Defaults to GaussianFocalLoss. |
| loss_embedding (:obj:`ConfigDict` or dict): Config of corner embedding |
| loss. Defaults to AssociativeEmbeddingLoss. |
| loss_offset (:obj:`ConfigDict` or dict): Config of corner offset loss. |
| Defaults to SmoothL1Loss. |
| init_cfg (:obj:`ConfigDict` or dict, optional): the config to control |
| the initialization. |
| """ |
|
|
| def __init__(self, |
| num_classes: int, |
| in_channels: int, |
| num_feat_levels: int = 2, |
| corner_emb_channels: int = 1, |
| train_cfg: OptConfigType = None, |
| test_cfg: OptConfigType = None, |
| loss_heatmap: ConfigType = dict( |
| type='GaussianFocalLoss', |
| alpha=2.0, |
| gamma=4.0, |
| loss_weight=1), |
| loss_embedding: ConfigType = dict( |
| type='AssociativeEmbeddingLoss', |
| pull_weight=0.25, |
| push_weight=0.25), |
| loss_offset: ConfigType = dict( |
| type='SmoothL1Loss', beta=1.0, loss_weight=1), |
| init_cfg: OptMultiConfig = None) -> None: |
| assert init_cfg is None, 'To prevent abnormal initialization ' \ |
| 'behavior, init_cfg is not allowed to be set' |
| super().__init__(init_cfg=init_cfg) |
| self.num_classes = num_classes |
| self.in_channels = in_channels |
| self.corner_emb_channels = corner_emb_channels |
| self.with_corner_emb = self.corner_emb_channels > 0 |
| self.corner_offset_channels = 2 |
| self.num_feat_levels = num_feat_levels |
| self.loss_heatmap = MODELS.build( |
| loss_heatmap) if loss_heatmap is not None else None |
| self.loss_embedding = MODELS.build( |
| loss_embedding) if loss_embedding is not None else None |
| self.loss_offset = MODELS.build( |
| loss_offset) if loss_offset is not None else None |
| self.train_cfg = train_cfg |
| self.test_cfg = test_cfg |
|
|
| self._init_layers() |
|
|
| def _make_layers(self, |
| out_channels: int, |
| in_channels: int = 256, |
| feat_channels: int = 256) -> nn.Sequential: |
| """Initialize conv sequential for CornerHead.""" |
| return nn.Sequential( |
| ConvModule(in_channels, feat_channels, 3, padding=1), |
| ConvModule( |
| feat_channels, out_channels, 1, norm_cfg=None, act_cfg=None)) |
|
|
| def _init_corner_kpt_layers(self) -> None: |
| """Initialize corner keypoint layers. |
| |
| Including corner heatmap branch and corner offset branch. Each branch |
| has two parts: prefix `tl_` for top-left and `br_` for bottom-right. |
| """ |
| self.tl_pool, self.br_pool = nn.ModuleList(), nn.ModuleList() |
| self.tl_heat, self.br_heat = nn.ModuleList(), nn.ModuleList() |
| self.tl_off, self.br_off = nn.ModuleList(), nn.ModuleList() |
|
|
| for _ in range(self.num_feat_levels): |
| self.tl_pool.append( |
| BiCornerPool( |
| self.in_channels, ['top', 'left'], |
| out_channels=self.in_channels)) |
| self.br_pool.append( |
| BiCornerPool( |
| self.in_channels, ['bottom', 'right'], |
| out_channels=self.in_channels)) |
|
|
| self.tl_heat.append( |
| self._make_layers( |
| out_channels=self.num_classes, |
| in_channels=self.in_channels)) |
| self.br_heat.append( |
| self._make_layers( |
| out_channels=self.num_classes, |
| in_channels=self.in_channels)) |
|
|
| self.tl_off.append( |
| self._make_layers( |
| out_channels=self.corner_offset_channels, |
| in_channels=self.in_channels)) |
| self.br_off.append( |
| self._make_layers( |
| out_channels=self.corner_offset_channels, |
| in_channels=self.in_channels)) |
|
|
| def _init_corner_emb_layers(self) -> None: |
| """Initialize corner embedding layers. |
| |
| Only include corner embedding branch with two parts: prefix `tl_` for |
| top-left and `br_` for bottom-right. |
| """ |
| self.tl_emb, self.br_emb = nn.ModuleList(), nn.ModuleList() |
|
|
| for _ in range(self.num_feat_levels): |
| self.tl_emb.append( |
| self._make_layers( |
| out_channels=self.corner_emb_channels, |
| in_channels=self.in_channels)) |
| self.br_emb.append( |
| self._make_layers( |
| out_channels=self.corner_emb_channels, |
| in_channels=self.in_channels)) |
|
|
| def _init_layers(self) -> None: |
| """Initialize layers for CornerHead. |
| |
| Including two parts: corner keypoint layers and corner embedding layers |
| """ |
| self._init_corner_kpt_layers() |
| if self.with_corner_emb: |
| self._init_corner_emb_layers() |
|
|
| def init_weights(self) -> None: |
| super().init_weights() |
| bias_init = bias_init_with_prob(0.1) |
| for i in range(self.num_feat_levels): |
| |
| |
| |
| |
| self.tl_heat[i][-1].conv.reset_parameters() |
| self.tl_heat[i][-1].conv.bias.data.fill_(bias_init) |
| self.br_heat[i][-1].conv.reset_parameters() |
| self.br_heat[i][-1].conv.bias.data.fill_(bias_init) |
| self.tl_off[i][-1].conv.reset_parameters() |
| self.br_off[i][-1].conv.reset_parameters() |
| if self.with_corner_emb: |
| self.tl_emb[i][-1].conv.reset_parameters() |
| self.br_emb[i][-1].conv.reset_parameters() |
|
|
| def forward(self, feats: Tuple[Tensor]) -> tuple: |
| """Forward features from the upstream network. |
| |
| Args: |
| feats (tuple[Tensor]): Features from the upstream network, each is |
| a 4D-tensor. |
| |
| Returns: |
| tuple: Usually a tuple of corner heatmaps, offset heatmaps and |
| embedding heatmaps. |
| - tl_heats (list[Tensor]): Top-left corner heatmaps for all |
| levels, each is a 4D-tensor, the channels number is |
| num_classes. |
| - br_heats (list[Tensor]): Bottom-right corner heatmaps for all |
| levels, each is a 4D-tensor, the channels number is |
| num_classes. |
| - tl_embs (list[Tensor] | list[None]): Top-left embedding |
| heatmaps for all levels, each is a 4D-tensor or None. |
| If not None, the channels number is corner_emb_channels. |
| - br_embs (list[Tensor] | list[None]): Bottom-right embedding |
| heatmaps for all levels, each is a 4D-tensor or None. |
| If not None, the channels number is corner_emb_channels. |
| - tl_offs (list[Tensor]): Top-left offset heatmaps for all |
| levels, each is a 4D-tensor. The channels number is |
| corner_offset_channels. |
| - br_offs (list[Tensor]): Bottom-right offset heatmaps for all |
| levels, each is a 4D-tensor. The channels number is |
| corner_offset_channels. |
| """ |
| lvl_ind = list(range(self.num_feat_levels)) |
| return multi_apply(self.forward_single, feats, lvl_ind) |
|
|
| def forward_single(self, |
| x: Tensor, |
| lvl_ind: int, |
| return_pool: bool = False) -> List[Tensor]: |
| """Forward feature of a single level. |
| |
| Args: |
| x (Tensor): Feature of a single level. |
| lvl_ind (int): Level index of current feature. |
| return_pool (bool): Return corner pool feature or not. |
| Defaults to False. |
| |
| Returns: |
| tuple[Tensor]: A tuple of CornerHead's output for current feature |
| level. Containing the following Tensors: |
| |
| - tl_heat (Tensor): Predicted top-left corner heatmap. |
| - br_heat (Tensor): Predicted bottom-right corner heatmap. |
| - tl_emb (Tensor | None): Predicted top-left embedding heatmap. |
| None for `self.with_corner_emb == False`. |
| - br_emb (Tensor | None): Predicted bottom-right embedding |
| heatmap. None for `self.with_corner_emb == False`. |
| - tl_off (Tensor): Predicted top-left offset heatmap. |
| - br_off (Tensor): Predicted bottom-right offset heatmap. |
| - tl_pool (Tensor): Top-left corner pool feature. Not must |
| have. |
| - br_pool (Tensor): Bottom-right corner pool feature. Not must |
| have. |
| """ |
| tl_pool = self.tl_pool[lvl_ind](x) |
| tl_heat = self.tl_heat[lvl_ind](tl_pool) |
| br_pool = self.br_pool[lvl_ind](x) |
| br_heat = self.br_heat[lvl_ind](br_pool) |
|
|
| tl_emb, br_emb = None, None |
| if self.with_corner_emb: |
| tl_emb = self.tl_emb[lvl_ind](tl_pool) |
| br_emb = self.br_emb[lvl_ind](br_pool) |
|
|
| tl_off = self.tl_off[lvl_ind](tl_pool) |
| br_off = self.br_off[lvl_ind](br_pool) |
|
|
| result_list = [tl_heat, br_heat, tl_emb, br_emb, tl_off, br_off] |
| if return_pool: |
| result_list.append(tl_pool) |
| result_list.append(br_pool) |
|
|
| return result_list |
|
|
| def get_targets(self, |
| gt_bboxes: List[Tensor], |
| gt_labels: List[Tensor], |
| feat_shape: Sequence[int], |
| img_shape: Sequence[int], |
| with_corner_emb: bool = False, |
| with_guiding_shift: bool = False, |
| with_centripetal_shift: bool = False) -> dict: |
| """Generate corner targets. |
| |
| Including corner heatmap, corner offset. |
| |
| Optional: corner embedding, corner guiding shift, centripetal shift. |
| |
| For CornerNet, we generate corner heatmap, corner offset and corner |
| embedding from this function. |
| |
| For CentripetalNet, we generate corner heatmap, corner offset, guiding |
| shift and centripetal shift from this function. |
| |
| Args: |
| gt_bboxes (list[Tensor]): Ground truth bboxes of each image, each |
| has shape (num_gt, 4). |
| gt_labels (list[Tensor]): Ground truth labels of each box, each has |
| shape (num_gt, ). |
| feat_shape (Sequence[int]): Shape of output feature, |
| [batch, channel, height, width]. |
| img_shape (Sequence[int]): Shape of input image, |
| [height, width, channel]. |
| with_corner_emb (bool): Generate corner embedding target or not. |
| Defaults to False. |
| with_guiding_shift (bool): Generate guiding shift target or not. |
| Defaults to False. |
| with_centripetal_shift (bool): Generate centripetal shift target or |
| not. Defaults to False. |
| |
| Returns: |
| dict: Ground truth of corner heatmap, corner offset, corner |
| embedding, guiding shift and centripetal shift. Containing the |
| following keys: |
| |
| - topleft_heatmap (Tensor): Ground truth top-left corner |
| heatmap. |
| - bottomright_heatmap (Tensor): Ground truth bottom-right |
| corner heatmap. |
| - topleft_offset (Tensor): Ground truth top-left corner offset. |
| - bottomright_offset (Tensor): Ground truth bottom-right corner |
| offset. |
| - corner_embedding (list[list[list[int]]]): Ground truth corner |
| embedding. Not must have. |
| - topleft_guiding_shift (Tensor): Ground truth top-left corner |
| guiding shift. Not must have. |
| - bottomright_guiding_shift (Tensor): Ground truth bottom-right |
| corner guiding shift. Not must have. |
| - topleft_centripetal_shift (Tensor): Ground truth top-left |
| corner centripetal shift. Not must have. |
| - bottomright_centripetal_shift (Tensor): Ground truth |
| bottom-right corner centripetal shift. Not must have. |
| """ |
| batch_size, _, height, width = feat_shape |
| img_h, img_w = img_shape[:2] |
|
|
| width_ratio = float(width / img_w) |
| height_ratio = float(height / img_h) |
|
|
| gt_tl_heatmap = gt_bboxes[-1].new_zeros( |
| [batch_size, self.num_classes, height, width]) |
| gt_br_heatmap = gt_bboxes[-1].new_zeros( |
| [batch_size, self.num_classes, height, width]) |
| gt_tl_offset = gt_bboxes[-1].new_zeros([batch_size, 2, height, width]) |
| gt_br_offset = gt_bboxes[-1].new_zeros([batch_size, 2, height, width]) |
|
|
| if with_corner_emb: |
| match = [] |
|
|
| |
| if with_guiding_shift: |
| gt_tl_guiding_shift = gt_bboxes[-1].new_zeros( |
| [batch_size, 2, height, width]) |
| gt_br_guiding_shift = gt_bboxes[-1].new_zeros( |
| [batch_size, 2, height, width]) |
| |
| |
| if with_centripetal_shift: |
| gt_tl_centripetal_shift = gt_bboxes[-1].new_zeros( |
| [batch_size, 2, height, width]) |
| gt_br_centripetal_shift = gt_bboxes[-1].new_zeros( |
| [batch_size, 2, height, width]) |
|
|
| for batch_id in range(batch_size): |
| |
| corner_match = [] |
| for box_id in range(len(gt_labels[batch_id])): |
| left, top, right, bottom = gt_bboxes[batch_id][box_id] |
| center_x = (left + right) / 2.0 |
| center_y = (top + bottom) / 2.0 |
| label = gt_labels[batch_id][box_id] |
|
|
| |
| scale_left = left * width_ratio |
| scale_right = right * width_ratio |
| scale_top = top * height_ratio |
| scale_bottom = bottom * height_ratio |
| scale_center_x = center_x * width_ratio |
| scale_center_y = center_y * height_ratio |
|
|
| |
| left_idx = int(min(scale_left, width - 1)) |
| right_idx = int(min(scale_right, width - 1)) |
| top_idx = int(min(scale_top, height - 1)) |
| bottom_idx = int(min(scale_bottom, height - 1)) |
|
|
| |
| scale_box_width = ceil(scale_right - scale_left) |
| scale_box_height = ceil(scale_bottom - scale_top) |
| radius = gaussian_radius((scale_box_height, scale_box_width), |
| min_overlap=0.3) |
| radius = max(0, int(radius)) |
| gt_tl_heatmap[batch_id, label] = gen_gaussian_target( |
| gt_tl_heatmap[batch_id, label], [left_idx, top_idx], |
| radius) |
| gt_br_heatmap[batch_id, label] = gen_gaussian_target( |
| gt_br_heatmap[batch_id, label], [right_idx, bottom_idx], |
| radius) |
|
|
| |
| left_offset = scale_left - left_idx |
| top_offset = scale_top - top_idx |
| right_offset = scale_right - right_idx |
| bottom_offset = scale_bottom - bottom_idx |
| gt_tl_offset[batch_id, 0, top_idx, left_idx] = left_offset |
| gt_tl_offset[batch_id, 1, top_idx, left_idx] = top_offset |
| gt_br_offset[batch_id, 0, bottom_idx, right_idx] = right_offset |
| gt_br_offset[batch_id, 1, bottom_idx, |
| right_idx] = bottom_offset |
|
|
| |
| if with_corner_emb: |
| corner_match.append([[top_idx, left_idx], |
| [bottom_idx, right_idx]]) |
| |
| if with_guiding_shift: |
| gt_tl_guiding_shift[batch_id, 0, top_idx, |
| left_idx] = scale_center_x - left_idx |
| gt_tl_guiding_shift[batch_id, 1, top_idx, |
| left_idx] = scale_center_y - top_idx |
| gt_br_guiding_shift[batch_id, 0, bottom_idx, |
| right_idx] = right_idx - scale_center_x |
| gt_br_guiding_shift[ |
| batch_id, 1, bottom_idx, |
| right_idx] = bottom_idx - scale_center_y |
| |
| if with_centripetal_shift: |
| gt_tl_centripetal_shift[batch_id, 0, top_idx, |
| left_idx] = log(scale_center_x - |
| scale_left) |
| gt_tl_centripetal_shift[batch_id, 1, top_idx, |
| left_idx] = log(scale_center_y - |
| scale_top) |
| gt_br_centripetal_shift[batch_id, 0, bottom_idx, |
| right_idx] = log(scale_right - |
| scale_center_x) |
| gt_br_centripetal_shift[batch_id, 1, bottom_idx, |
| right_idx] = log(scale_bottom - |
| scale_center_y) |
|
|
| if with_corner_emb: |
| match.append(corner_match) |
|
|
| target_result = dict( |
| topleft_heatmap=gt_tl_heatmap, |
| topleft_offset=gt_tl_offset, |
| bottomright_heatmap=gt_br_heatmap, |
| bottomright_offset=gt_br_offset) |
|
|
| if with_corner_emb: |
| target_result.update(corner_embedding=match) |
| if with_guiding_shift: |
| target_result.update( |
| topleft_guiding_shift=gt_tl_guiding_shift, |
| bottomright_guiding_shift=gt_br_guiding_shift) |
| if with_centripetal_shift: |
| target_result.update( |
| topleft_centripetal_shift=gt_tl_centripetal_shift, |
| bottomright_centripetal_shift=gt_br_centripetal_shift) |
|
|
| return target_result |
|
|
| def loss_by_feat( |
| self, |
| tl_heats: List[Tensor], |
| br_heats: List[Tensor], |
| tl_embs: List[Tensor], |
| br_embs: List[Tensor], |
| tl_offs: List[Tensor], |
| br_offs: List[Tensor], |
| batch_gt_instances: InstanceList, |
| batch_img_metas: List[dict], |
| batch_gt_instances_ignore: OptInstanceList = None) -> dict: |
| """Calculate the loss based on the features extracted by the detection |
| head. |
| |
| Args: |
| tl_heats (list[Tensor]): Top-left corner heatmaps for each level |
| with shape (N, num_classes, H, W). |
| br_heats (list[Tensor]): Bottom-right corner heatmaps for each |
| level with shape (N, num_classes, H, W). |
| tl_embs (list[Tensor]): Top-left corner embeddings for each level |
| with shape (N, corner_emb_channels, H, W). |
| br_embs (list[Tensor]): Bottom-right corner embeddings for each |
| level with shape (N, corner_emb_channels, H, W). |
| tl_offs (list[Tensor]): Top-left corner offsets for each level |
| with shape (N, corner_offset_channels, H, W). |
| br_offs (list[Tensor]): Bottom-right corner offsets for each level |
| with shape (N, corner_offset_channels, H, W). |
| batch_gt_instances (list[:obj:`InstanceData`]): Batch of |
| gt_instance. It usually includes ``bboxes`` and ``labels`` |
| attributes. |
| batch_img_metas (list[dict]): Meta information of each image, e.g., |
| image size, scaling factor, etc. |
| batch_gt_instances_ignore (list[:obj:`InstanceData`], optional): |
| Specify which bounding boxes can be ignored when computing |
| the loss. |
| |
| Returns: |
| dict[str, Tensor]: A dictionary of loss components. Containing the |
| following losses: |
| |
| - det_loss (list[Tensor]): Corner keypoint losses of all |
| feature levels. |
| - pull_loss (list[Tensor]): Part one of AssociativeEmbedding |
| losses of all feature levels. |
| - push_loss (list[Tensor]): Part two of AssociativeEmbedding |
| losses of all feature levels. |
| - off_loss (list[Tensor]): Corner offset losses of all feature |
| levels. |
| """ |
| gt_bboxes = [ |
| gt_instances.bboxes for gt_instances in batch_gt_instances |
| ] |
| gt_labels = [ |
| gt_instances.labels for gt_instances in batch_gt_instances |
| ] |
|
|
| targets = self.get_targets( |
| gt_bboxes, |
| gt_labels, |
| tl_heats[-1].shape, |
| batch_img_metas[0]['batch_input_shape'], |
| with_corner_emb=self.with_corner_emb) |
| mlvl_targets = [targets for _ in range(self.num_feat_levels)] |
| det_losses, pull_losses, push_losses, off_losses = multi_apply( |
| self.loss_by_feat_single, tl_heats, br_heats, tl_embs, br_embs, |
| tl_offs, br_offs, mlvl_targets) |
| loss_dict = dict(det_loss=det_losses, off_loss=off_losses) |
| if self.with_corner_emb: |
| loss_dict.update(pull_loss=pull_losses, push_loss=push_losses) |
| return loss_dict |
|
|
| def loss_by_feat_single(self, tl_hmp: Tensor, br_hmp: Tensor, |
| tl_emb: Optional[Tensor], br_emb: Optional[Tensor], |
| tl_off: Tensor, br_off: Tensor, |
| targets: dict) -> Tuple[Tensor, ...]: |
| """Calculate the loss of a single scale level based on the features |
| extracted by the detection head. |
| |
| Args: |
| tl_hmp (Tensor): Top-left corner heatmap for current level with |
| shape (N, num_classes, H, W). |
| br_hmp (Tensor): Bottom-right corner heatmap for current level with |
| shape (N, num_classes, H, W). |
| tl_emb (Tensor, optional): Top-left corner embedding for current |
| level with shape (N, corner_emb_channels, H, W). |
| br_emb (Tensor, optional): Bottom-right corner embedding for |
| current level with shape (N, corner_emb_channels, H, W). |
| tl_off (Tensor): Top-left corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| br_off (Tensor): Bottom-right corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| targets (dict): Corner target generated by `get_targets`. |
| |
| Returns: |
| tuple[torch.Tensor]: Losses of the head's different branches |
| containing the following losses: |
| |
| - det_loss (Tensor): Corner keypoint loss. |
| - pull_loss (Tensor): Part one of AssociativeEmbedding loss. |
| - push_loss (Tensor): Part two of AssociativeEmbedding loss. |
| - off_loss (Tensor): Corner offset loss. |
| """ |
| gt_tl_hmp = targets['topleft_heatmap'] |
| gt_br_hmp = targets['bottomright_heatmap'] |
| gt_tl_off = targets['topleft_offset'] |
| gt_br_off = targets['bottomright_offset'] |
| gt_embedding = targets['corner_embedding'] |
|
|
| |
| tl_det_loss = self.loss_heatmap( |
| tl_hmp.sigmoid(), |
| gt_tl_hmp, |
| avg_factor=max(1, gt_tl_hmp.eq(1).sum())) |
| br_det_loss = self.loss_heatmap( |
| br_hmp.sigmoid(), |
| gt_br_hmp, |
| avg_factor=max(1, gt_br_hmp.eq(1).sum())) |
| det_loss = (tl_det_loss + br_det_loss) / 2.0 |
|
|
| |
| if self.with_corner_emb and self.loss_embedding is not None: |
| pull_loss, push_loss = self.loss_embedding(tl_emb, br_emb, |
| gt_embedding) |
| else: |
| pull_loss, push_loss = None, None |
|
|
| |
| |
| |
| |
| |
| tl_off_mask = gt_tl_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as( |
| gt_tl_hmp) |
| br_off_mask = gt_br_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as( |
| gt_br_hmp) |
| tl_off_loss = self.loss_offset( |
| tl_off, |
| gt_tl_off, |
| tl_off_mask, |
| avg_factor=max(1, tl_off_mask.sum())) |
| br_off_loss = self.loss_offset( |
| br_off, |
| gt_br_off, |
| br_off_mask, |
| avg_factor=max(1, br_off_mask.sum())) |
|
|
| off_loss = (tl_off_loss + br_off_loss) / 2.0 |
|
|
| return det_loss, pull_loss, push_loss, off_loss |
|
|
| def predict_by_feat(self, |
| tl_heats: List[Tensor], |
| br_heats: List[Tensor], |
| tl_embs: List[Tensor], |
| br_embs: List[Tensor], |
| tl_offs: List[Tensor], |
| br_offs: List[Tensor], |
| batch_img_metas: Optional[List[dict]] = None, |
| rescale: bool = False, |
| with_nms: bool = True) -> InstanceList: |
| """Transform a batch of output features extracted from the head into |
| bbox results. |
| |
| Args: |
| tl_heats (list[Tensor]): Top-left corner heatmaps for each level |
| with shape (N, num_classes, H, W). |
| br_heats (list[Tensor]): Bottom-right corner heatmaps for each |
| level with shape (N, num_classes, H, W). |
| tl_embs (list[Tensor]): Top-left corner embeddings for each level |
| with shape (N, corner_emb_channels, H, W). |
| br_embs (list[Tensor]): Bottom-right corner embeddings for each |
| level with shape (N, corner_emb_channels, H, W). |
| tl_offs (list[Tensor]): Top-left corner offsets for each level |
| with shape (N, corner_offset_channels, H, W). |
| br_offs (list[Tensor]): Bottom-right corner offsets for each level |
| with shape (N, corner_offset_channels, H, W). |
| batch_img_metas (list[dict], optional): Batch image meta info. |
| Defaults to None. |
| rescale (bool): If True, return boxes in original image space. |
| Defaults to False. |
| with_nms (bool): If True, do nms before return boxes. |
| Defaults to True. |
| |
| Returns: |
| list[:obj:`InstanceData`]: Object detection results of each image |
| after the post process. Each item usually contains following keys. |
| |
| - scores (Tensor): Classification scores, has a shape |
| (num_instance, ) |
| - labels (Tensor): Labels of bboxes, has a shape |
| (num_instances, ). |
| - bboxes (Tensor): Has a shape (num_instances, 4), |
| the last dimension 4 arrange as (x1, y1, x2, y2). |
| """ |
| assert tl_heats[-1].shape[0] == br_heats[-1].shape[0] == len( |
| batch_img_metas) |
| result_list = [] |
| for img_id in range(len(batch_img_metas)): |
| result_list.append( |
| self._predict_by_feat_single( |
| tl_heats[-1][img_id:img_id + 1, :], |
| br_heats[-1][img_id:img_id + 1, :], |
| tl_offs[-1][img_id:img_id + 1, :], |
| br_offs[-1][img_id:img_id + 1, :], |
| batch_img_metas[img_id], |
| tl_emb=tl_embs[-1][img_id:img_id + 1, :], |
| br_emb=br_embs[-1][img_id:img_id + 1, :], |
| rescale=rescale, |
| with_nms=with_nms)) |
|
|
| return result_list |
|
|
| def _predict_by_feat_single(self, |
| tl_heat: Tensor, |
| br_heat: Tensor, |
| tl_off: Tensor, |
| br_off: Tensor, |
| img_meta: dict, |
| tl_emb: Optional[Tensor] = None, |
| br_emb: Optional[Tensor] = None, |
| tl_centripetal_shift: Optional[Tensor] = None, |
| br_centripetal_shift: Optional[Tensor] = None, |
| rescale: bool = False, |
| with_nms: bool = True) -> InstanceData: |
| """Transform a single image's features extracted from the head into |
| bbox results. |
| |
| Args: |
| tl_heat (Tensor): Top-left corner heatmap for current level with |
| shape (N, num_classes, H, W). |
| br_heat (Tensor): Bottom-right corner heatmap for current level |
| with shape (N, num_classes, H, W). |
| tl_off (Tensor): Top-left corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| br_off (Tensor): Bottom-right corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| img_meta (dict): Meta information of current image, e.g., |
| image size, scaling factor, etc. |
| tl_emb (Tensor): Top-left corner embedding for current level with |
| shape (N, corner_emb_channels, H, W). |
| br_emb (Tensor): Bottom-right corner embedding for current level |
| with shape (N, corner_emb_channels, H, W). |
| tl_centripetal_shift: Top-left corner's centripetal shift for |
| current level with shape (N, 2, H, W). |
| br_centripetal_shift: Bottom-right corner's centripetal shift for |
| current level with shape (N, 2, H, W). |
| rescale (bool): If True, return boxes in original image space. |
| Defaults to False. |
| with_nms (bool): If True, do nms before return boxes. |
| Defaults to True. |
| |
| Returns: |
| :obj:`InstanceData`: Detection results of each image |
| after the post process. |
| Each item usually contains following keys. |
| |
| - scores (Tensor): Classification scores, has a shape |
| (num_instance, ) |
| - labels (Tensor): Labels of bboxes, has a shape |
| (num_instances, ). |
| - bboxes (Tensor): Has a shape (num_instances, 4), |
| the last dimension 4 arrange as (x1, y1, x2, y2). |
| """ |
| if isinstance(img_meta, (list, tuple)): |
| img_meta = img_meta[0] |
|
|
| batch_bboxes, batch_scores, batch_clses = self._decode_heatmap( |
| tl_heat=tl_heat.sigmoid(), |
| br_heat=br_heat.sigmoid(), |
| tl_off=tl_off, |
| br_off=br_off, |
| tl_emb=tl_emb, |
| br_emb=br_emb, |
| tl_centripetal_shift=tl_centripetal_shift, |
| br_centripetal_shift=br_centripetal_shift, |
| img_meta=img_meta, |
| k=self.test_cfg.corner_topk, |
| kernel=self.test_cfg.local_maximum_kernel, |
| distance_threshold=self.test_cfg.distance_threshold) |
|
|
| if rescale and 'scale_factor' in img_meta: |
| batch_bboxes /= batch_bboxes.new_tensor( |
| img_meta['scale_factor']).repeat((1, 2)) |
|
|
| bboxes = batch_bboxes.view([-1, 4]) |
| scores = batch_scores.view(-1) |
| clses = batch_clses.view(-1) |
|
|
| det_bboxes = torch.cat([bboxes, scores.unsqueeze(-1)], -1) |
| keepinds = (det_bboxes[:, -1] > -0.1) |
| det_bboxes = det_bboxes[keepinds] |
| det_labels = clses[keepinds] |
|
|
| if with_nms: |
| det_bboxes, det_labels = self._bboxes_nms(det_bboxes, det_labels, |
| self.test_cfg) |
|
|
| results = InstanceData() |
| results.bboxes = det_bboxes[..., :4] |
| results.scores = det_bboxes[..., 4] |
| results.labels = det_labels |
| return results |
|
|
| def _bboxes_nms(self, bboxes: Tensor, labels: Tensor, |
| cfg: ConfigDict) -> Tuple[Tensor, Tensor]: |
| """bboxes nms.""" |
| if 'nms_cfg' in cfg: |
| warning.warn('nms_cfg in test_cfg will be deprecated. ' |
| 'Please rename it as nms') |
| if 'nms' not in cfg: |
| cfg.nms = cfg.nms_cfg |
|
|
| if labels.numel() > 0: |
| max_num = cfg.max_per_img |
| bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:, |
| -1].contiguous(), |
| labels, cfg.nms) |
| if max_num > 0: |
| bboxes = bboxes[:max_num] |
| labels = labels[keep][:max_num] |
|
|
| return bboxes, labels |
|
|
| def _decode_heatmap(self, |
| tl_heat: Tensor, |
| br_heat: Tensor, |
| tl_off: Tensor, |
| br_off: Tensor, |
| tl_emb: Optional[Tensor] = None, |
| br_emb: Optional[Tensor] = None, |
| tl_centripetal_shift: Optional[Tensor] = None, |
| br_centripetal_shift: Optional[Tensor] = None, |
| img_meta: Optional[dict] = None, |
| k: int = 100, |
| kernel: int = 3, |
| distance_threshold: float = 0.5, |
| num_dets: int = 1000) -> Tuple[Tensor, Tensor, Tensor]: |
| """Transform outputs into detections raw bbox prediction. |
| |
| Args: |
| tl_heat (Tensor): Top-left corner heatmap for current level with |
| shape (N, num_classes, H, W). |
| br_heat (Tensor): Bottom-right corner heatmap for current level |
| with shape (N, num_classes, H, W). |
| tl_off (Tensor): Top-left corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| br_off (Tensor): Bottom-right corner offset for current level with |
| shape (N, corner_offset_channels, H, W). |
| tl_emb (Tensor, Optional): Top-left corner embedding for current |
| level with shape (N, corner_emb_channels, H, W). |
| br_emb (Tensor, Optional): Bottom-right corner embedding for |
| current level with shape (N, corner_emb_channels, H, W). |
| tl_centripetal_shift (Tensor, Optional): Top-left centripetal shift |
| for current level with shape (N, 2, H, W). |
| br_centripetal_shift (Tensor, Optional): Bottom-right centripetal |
| shift for current level with shape (N, 2, H, W). |
| img_meta (dict): Meta information of current image, e.g., |
| image size, scaling factor, etc. |
| k (int): Get top k corner keypoints from heatmap. |
| kernel (int): Max pooling kernel for extract local maximum pixels. |
| distance_threshold (float): Distance threshold. Top-left and |
| bottom-right corner keypoints with feature distance less than |
| the threshold will be regarded as keypoints from same object. |
| num_dets (int): Num of raw boxes before doing nms. |
| |
| Returns: |
| tuple[torch.Tensor]: Decoded output of CornerHead, containing the |
| following Tensors: |
| |
| - bboxes (Tensor): Coords of each box. |
| - scores (Tensor): Scores of each box. |
| - clses (Tensor): Categories of each box. |
| """ |
| with_embedding = tl_emb is not None and br_emb is not None |
| with_centripetal_shift = ( |
| tl_centripetal_shift is not None |
| and br_centripetal_shift is not None) |
| assert with_embedding + with_centripetal_shift == 1 |
| batch, _, height, width = tl_heat.size() |
| if torch.onnx.is_in_onnx_export(): |
| inp_h, inp_w = img_meta['pad_shape_for_onnx'][:2] |
| else: |
| inp_h, inp_w = img_meta['batch_input_shape'][:2] |
|
|
| |
| tl_heat = get_local_maximum(tl_heat, kernel=kernel) |
| br_heat = get_local_maximum(br_heat, kernel=kernel) |
|
|
| tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = get_topk_from_heatmap( |
| tl_heat, k=k) |
| br_scores, br_inds, br_clses, br_ys, br_xs = get_topk_from_heatmap( |
| br_heat, k=k) |
|
|
| |
| |
| |
| |
| tl_ys = tl_ys.view(batch, k, 1).repeat(1, 1, k) |
| tl_xs = tl_xs.view(batch, k, 1).repeat(1, 1, k) |
| br_ys = br_ys.view(batch, 1, k).repeat(1, k, 1) |
| br_xs = br_xs.view(batch, 1, k).repeat(1, k, 1) |
|
|
| tl_off = transpose_and_gather_feat(tl_off, tl_inds) |
| tl_off = tl_off.view(batch, k, 1, 2) |
| br_off = transpose_and_gather_feat(br_off, br_inds) |
| br_off = br_off.view(batch, 1, k, 2) |
|
|
| tl_xs = tl_xs + tl_off[..., 0] |
| tl_ys = tl_ys + tl_off[..., 1] |
| br_xs = br_xs + br_off[..., 0] |
| br_ys = br_ys + br_off[..., 1] |
|
|
| if with_centripetal_shift: |
| tl_centripetal_shift = transpose_and_gather_feat( |
| tl_centripetal_shift, tl_inds).view(batch, k, 1, 2).exp() |
| br_centripetal_shift = transpose_and_gather_feat( |
| br_centripetal_shift, br_inds).view(batch, 1, k, 2).exp() |
|
|
| tl_ctxs = tl_xs + tl_centripetal_shift[..., 0] |
| tl_ctys = tl_ys + tl_centripetal_shift[..., 1] |
| br_ctxs = br_xs - br_centripetal_shift[..., 0] |
| br_ctys = br_ys - br_centripetal_shift[..., 1] |
|
|
| |
| tl_xs *= (inp_w / width) |
| tl_ys *= (inp_h / height) |
| br_xs *= (inp_w / width) |
| br_ys *= (inp_h / height) |
|
|
| if with_centripetal_shift: |
| tl_ctxs *= (inp_w / width) |
| tl_ctys *= (inp_h / height) |
| br_ctxs *= (inp_w / width) |
| br_ctys *= (inp_h / height) |
|
|
| x_off, y_off = 0, 0 |
| if not torch.onnx.is_in_onnx_export(): |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if 'border' in img_meta: |
| x_off = img_meta['border'][2] |
| y_off = img_meta['border'][0] |
|
|
| tl_xs -= x_off |
| tl_ys -= y_off |
| br_xs -= x_off |
| br_ys -= y_off |
|
|
| zeros = tl_xs.new_zeros(*tl_xs.size()) |
| tl_xs = torch.where(tl_xs > 0.0, tl_xs, zeros) |
| tl_ys = torch.where(tl_ys > 0.0, tl_ys, zeros) |
| br_xs = torch.where(br_xs > 0.0, br_xs, zeros) |
| br_ys = torch.where(br_ys > 0.0, br_ys, zeros) |
|
|
| bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) |
| area_bboxes = ((br_xs - tl_xs) * (br_ys - tl_ys)).abs() |
|
|
| if with_centripetal_shift: |
| tl_ctxs -= x_off |
| tl_ctys -= y_off |
| br_ctxs -= x_off |
| br_ctys -= y_off |
|
|
| tl_ctxs *= tl_ctxs.gt(0.0).type_as(tl_ctxs) |
| tl_ctys *= tl_ctys.gt(0.0).type_as(tl_ctys) |
| br_ctxs *= br_ctxs.gt(0.0).type_as(br_ctxs) |
| br_ctys *= br_ctys.gt(0.0).type_as(br_ctys) |
|
|
| ct_bboxes = torch.stack((tl_ctxs, tl_ctys, br_ctxs, br_ctys), |
| dim=3) |
| area_ct_bboxes = ((br_ctxs - tl_ctxs) * (br_ctys - tl_ctys)).abs() |
|
|
| rcentral = torch.zeros_like(ct_bboxes) |
| |
| mu = torch.ones_like(area_bboxes) / 2.4 |
| mu[area_bboxes > 3500] = 1 / 2.1 |
|
|
| bboxes_center_x = (bboxes[..., 0] + bboxes[..., 2]) / 2 |
| bboxes_center_y = (bboxes[..., 1] + bboxes[..., 3]) / 2 |
| rcentral[..., 0] = bboxes_center_x - mu * (bboxes[..., 2] - |
| bboxes[..., 0]) / 2 |
| rcentral[..., 1] = bboxes_center_y - mu * (bboxes[..., 3] - |
| bboxes[..., 1]) / 2 |
| rcentral[..., 2] = bboxes_center_x + mu * (bboxes[..., 2] - |
| bboxes[..., 0]) / 2 |
| rcentral[..., 3] = bboxes_center_y + mu * (bboxes[..., 3] - |
| bboxes[..., 1]) / 2 |
| area_rcentral = ((rcentral[..., 2] - rcentral[..., 0]) * |
| (rcentral[..., 3] - rcentral[..., 1])).abs() |
| dists = area_ct_bboxes / area_rcentral |
|
|
| tl_ctx_inds = (ct_bboxes[..., 0] <= rcentral[..., 0]) | ( |
| ct_bboxes[..., 0] >= rcentral[..., 2]) |
| tl_cty_inds = (ct_bboxes[..., 1] <= rcentral[..., 1]) | ( |
| ct_bboxes[..., 1] >= rcentral[..., 3]) |
| br_ctx_inds = (ct_bboxes[..., 2] <= rcentral[..., 0]) | ( |
| ct_bboxes[..., 2] >= rcentral[..., 2]) |
| br_cty_inds = (ct_bboxes[..., 3] <= rcentral[..., 1]) | ( |
| ct_bboxes[..., 3] >= rcentral[..., 3]) |
|
|
| if with_embedding: |
| tl_emb = transpose_and_gather_feat(tl_emb, tl_inds) |
| tl_emb = tl_emb.view(batch, k, 1) |
| br_emb = transpose_and_gather_feat(br_emb, br_inds) |
| br_emb = br_emb.view(batch, 1, k) |
| dists = torch.abs(tl_emb - br_emb) |
|
|
| tl_scores = tl_scores.view(batch, k, 1).repeat(1, 1, k) |
| br_scores = br_scores.view(batch, 1, k).repeat(1, k, 1) |
|
|
| scores = (tl_scores + br_scores) / 2 |
|
|
| |
| tl_clses = tl_clses.view(batch, k, 1).repeat(1, 1, k) |
| br_clses = br_clses.view(batch, 1, k).repeat(1, k, 1) |
| cls_inds = (tl_clses != br_clses) |
|
|
| |
| dist_inds = dists > distance_threshold |
|
|
| |
| width_inds = (br_xs <= tl_xs) |
| height_inds = (br_ys <= tl_ys) |
|
|
| |
| |
| |
| |
| negative_scores = -1 * torch.ones_like(scores) |
| scores = torch.where(cls_inds, negative_scores, scores) |
| scores = torch.where(width_inds, negative_scores, scores) |
| scores = torch.where(height_inds, negative_scores, scores) |
| scores = torch.where(dist_inds, negative_scores, scores) |
|
|
| if with_centripetal_shift: |
| scores[tl_ctx_inds] = -1 |
| scores[tl_cty_inds] = -1 |
| scores[br_ctx_inds] = -1 |
| scores[br_cty_inds] = -1 |
|
|
| scores = scores.view(batch, -1) |
| scores, inds = torch.topk(scores, num_dets) |
| scores = scores.unsqueeze(2) |
|
|
| bboxes = bboxes.view(batch, -1, 4) |
| bboxes = gather_feat(bboxes, inds) |
|
|
| clses = tl_clses.contiguous().view(batch, -1, 1) |
| clses = gather_feat(clses, inds) |
|
|
| return bboxes, scores, clses |
|
|