model_fall
/
PaddleDetection-release-2.6
/ppdet
/modeling
/assigners
/rotated_task_aligned_assigner.py
| # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import paddle | |
| import paddle.nn as nn | |
| import paddle.nn.functional as F | |
| from ppdet.core.workspace import register | |
| from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes | |
| from .utils import gather_topk_anchors, compute_max_iou_anchor | |
| __all__ = ['RotatedTaskAlignedAssigner'] | |
| class RotatedTaskAlignedAssigner(nn.Layer): | |
| """TOOD: Task-aligned One-stage Object Detection | |
| """ | |
| def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9): | |
| super(RotatedTaskAlignedAssigner, self).__init__() | |
| self.topk = topk | |
| self.alpha = alpha | |
| self.beta = beta | |
| self.eps = eps | |
| def forward(self, | |
| pred_scores, | |
| pred_bboxes, | |
| anchor_points, | |
| num_anchors_list, | |
| gt_labels, | |
| gt_bboxes, | |
| pad_gt_mask, | |
| bg_index, | |
| gt_scores=None): | |
| r"""This code is based on | |
| https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py | |
| The assignment is done in following steps | |
| 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt | |
| 2. select top-k bbox as candidates for each gt | |
| 3. limit the positive sample's center in gt (because the anchor-free detector | |
| only can predict positive distance) | |
| 4. if an anchor box is assigned to multiple gts, the one with the | |
| highest iou will be selected. | |
| Args: | |
| pred_scores (Tensor, float32): predicted class probability, shape(B, L, C) | |
| pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5) | |
| anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format | |
| num_anchors_list (List): num of anchors in each level, shape(L) | |
| gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1) | |
| gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5) | |
| pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1) | |
| bg_index (int): background index | |
| gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1) | |
| Returns: | |
| assigned_labels (Tensor): (B, L) | |
| assigned_bboxes (Tensor): (B, L, 5) | |
| assigned_scores (Tensor): (B, L, C) | |
| """ | |
| assert pred_scores.ndim == pred_bboxes.ndim | |
| assert gt_labels.ndim == gt_bboxes.ndim and \ | |
| gt_bboxes.ndim == 3 | |
| batch_size, num_anchors, num_classes = pred_scores.shape | |
| _, num_max_boxes, _ = gt_bboxes.shape | |
| # negative batch | |
| if num_max_boxes == 0: | |
| assigned_labels = paddle.full( | |
| [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype) | |
| assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5]) | |
| assigned_scores = paddle.zeros( | |
| [batch_size, num_anchors, num_classes]) | |
| return assigned_labels, assigned_bboxes, assigned_scores | |
| # compute iou between gt and pred bbox, [B, n, L] | |
| ious = rotated_iou_similarity(gt_bboxes, pred_bboxes) | |
| ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious) | |
| ious.stop_gradient = True | |
| # gather pred bboxes class score | |
| pred_scores = pred_scores.transpose([0, 2, 1]) | |
| batch_ind = paddle.arange( | |
| end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1) | |
| gt_labels_ind = paddle.stack( | |
| [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)], | |
| axis=-1) | |
| bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind) | |
| # compute alignment metrics, [B, n, L] | |
| alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow( | |
| self.beta) | |
| # check the positive sample's center in gt, [B, n, L] | |
| is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes) | |
| # select topk largest alignment metrics pred bbox as candidates | |
| # for each gt, [B, n, L] | |
| is_in_topk = gather_topk_anchors( | |
| alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask) | |
| # select positive sample, [B, n, L] | |
| mask_positive = is_in_topk * is_in_gts * pad_gt_mask | |
| # if an anchor box is assigned to multiple gts, | |
| # the one with the highest iou will be selected, [B, n, L] | |
| mask_positive_sum = mask_positive.sum(axis=-2) | |
| if mask_positive_sum.max() > 1: | |
| mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile( | |
| [1, num_max_boxes, 1]) | |
| is_max_iou = compute_max_iou_anchor(ious) | |
| mask_positive = paddle.where(mask_multiple_gts, is_max_iou, | |
| mask_positive) | |
| mask_positive_sum = mask_positive.sum(axis=-2) | |
| assigned_gt_index = mask_positive.argmax(axis=-2) | |
| # assigned target | |
| assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes | |
| assigned_labels = paddle.gather( | |
| gt_labels.flatten(), assigned_gt_index.flatten(), axis=0) | |
| assigned_labels = assigned_labels.reshape([batch_size, num_anchors]) | |
| assigned_labels = paddle.where( | |
| mask_positive_sum > 0, assigned_labels, | |
| paddle.full_like(assigned_labels, bg_index)) | |
| assigned_bboxes = paddle.gather( | |
| gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0) | |
| assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5]) | |
| assigned_scores = F.one_hot(assigned_labels, num_classes + 1) | |
| ind = list(range(num_classes + 1)) | |
| ind.remove(bg_index) | |
| assigned_scores = paddle.index_select( | |
| assigned_scores, paddle.to_tensor(ind), axis=-1) | |
| # rescale alignment metrics | |
| alignment_metrics *= mask_positive | |
| max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True) | |
| max_ious_per_instance = (ious * mask_positive).max(axis=-1, | |
| keepdim=True) | |
| alignment_metrics = alignment_metrics / ( | |
| max_metrics_per_instance + self.eps) * max_ious_per_instance | |
| alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1) | |
| assigned_scores = assigned_scores * alignment_metrics | |
| assigned_bboxes.stop_gradient = True | |
| assigned_scores.stop_gradient = True | |
| assigned_labels.stop_gradient = True | |
| return assigned_labels, assigned_bboxes, assigned_scores | |