| |
| import logging |
| import numpy as np |
| import torch |
|
|
| from detectron2.config import configurable |
| from detectron2.layers import ShapeSpec, batched_nms_rotated |
| from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated |
| from detectron2.utils.events import get_event_storage |
|
|
| from ..box_regression import Box2BoxTransformRotated |
| from ..poolers import ROIPooler |
| from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals |
| from .box_head import build_box_head |
| from .fast_rcnn import FastRCNNOutputLayers |
| from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads |
|
|
| logger = logging.getLogger(__name__) |
|
|
| """ |
| Shape shorthand in this module: |
| |
| N: number of images in the minibatch |
| R: number of ROIs, combined over all images, in the minibatch |
| Ri: number of ROIs in image i |
| K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. |
| |
| Naming convention: |
| |
| deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box |
| transform (see :class:`box_regression.Box2BoxTransformRotated`). |
| |
| pred_class_logits: predicted class scores in [-inf, +inf]; use |
| softmax(pred_class_logits) to estimate P(class). |
| |
| gt_classes: ground-truth classification labels in [0, K], where [0, K) represent |
| foreground object classes and K represents the background class. |
| |
| pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals |
| to detection box predictions. |
| |
| gt_proposal_deltas: ground-truth rotated box2box transform deltas |
| """ |
|
|
|
|
| def fast_rcnn_inference_rotated( |
| boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image |
| ): |
| """ |
| Call `fast_rcnn_inference_single_image_rotated` for all images. |
| |
| Args: |
| boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic |
| boxes for each image. Element i has shape (Ri, K * 5) if doing |
| class-specific regression, or (Ri, 5) if doing class-agnostic |
| regression, where Ri is the number of predicted objects for image i. |
| This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. |
| scores (list[Tensor]): A list of Tensors of predicted class scores for each image. |
| Element i has shape (Ri, K + 1), where Ri is the number of predicted objects |
| for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. |
| image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. |
| score_thresh (float): Only return detections with a confidence score exceeding this |
| threshold. |
| nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. |
| topk_per_image (int): The number of top scoring detections to return. Set < 0 to return |
| all detections. |
| |
| Returns: |
| instances: (list[Instances]): A list of N instances, one for each image in the batch, |
| that stores the topk most confidence detections. |
| kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates |
| the corresponding boxes/scores index in [0, Ri) from the input, for image i. |
| """ |
| result_per_image = [ |
| fast_rcnn_inference_single_image_rotated( |
| boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image |
| ) |
| for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) |
| ] |
| return [x[0] for x in result_per_image], [x[1] for x in result_per_image] |
|
|
|
|
| @torch.no_grad() |
| def fast_rcnn_inference_single_image_rotated( |
| boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image |
| ): |
| """ |
| Single-image inference. Return rotated bounding-box detection results by thresholding |
| on scores and applying rotated non-maximum suppression (Rotated NMS). |
| |
| Args: |
| Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes |
| per image. |
| |
| Returns: |
| Same as `fast_rcnn_inference_rotated`, but for only one image. |
| """ |
| valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) |
| if not valid_mask.all(): |
| boxes = boxes[valid_mask] |
| scores = scores[valid_mask] |
|
|
| B = 5 |
| scores = scores[:, :-1] |
| num_bbox_reg_classes = boxes.shape[1] // B |
| |
| boxes = RotatedBoxes(boxes.reshape(-1, B)) |
| boxes.clip(image_shape) |
| boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) |
| |
| filter_mask = scores > score_thresh |
| |
| |
| filter_inds = filter_mask.nonzero() |
| if num_bbox_reg_classes == 1: |
| boxes = boxes[filter_inds[:, 0], 0] |
| else: |
| boxes = boxes[filter_mask] |
| scores = scores[filter_mask] |
|
|
| |
| keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) |
| if topk_per_image >= 0: |
| keep = keep[:topk_per_image] |
| boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] |
|
|
| result = Instances(image_shape) |
| result.pred_boxes = RotatedBoxes(boxes) |
| result.scores = scores |
| result.pred_classes = filter_inds[:, 1] |
|
|
| return result, filter_inds[:, 0] |
|
|
|
|
| class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers): |
| """ |
| Two linear layers for predicting Rotated Fast R-CNN outputs. |
| """ |
|
|
| @classmethod |
| def from_config(cls, cfg, input_shape): |
| args = super().from_config(cfg, input_shape) |
| args["box2box_transform"] = Box2BoxTransformRotated( |
| weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS |
| ) |
| return args |
|
|
| def inference(self, predictions, proposals): |
| """ |
| Returns: |
| list[Instances]: same as `fast_rcnn_inference_rotated`. |
| list[Tensor]: same as `fast_rcnn_inference_rotated`. |
| """ |
| boxes = self.predict_boxes(predictions, proposals) |
| scores = self.predict_probs(predictions, proposals) |
| image_shapes = [x.image_size for x in proposals] |
|
|
| return fast_rcnn_inference_rotated( |
| boxes, |
| scores, |
| image_shapes, |
| self.test_score_thresh, |
| self.test_nms_thresh, |
| self.test_topk_per_image, |
| ) |
|
|
|
|
| @ROI_HEADS_REGISTRY.register() |
| class RROIHeads(StandardROIHeads): |
| """ |
| This class is used by Rotated Fast R-CNN to detect rotated boxes. |
| For now, it only supports box predictions but not mask or keypoints. |
| """ |
|
|
| @configurable |
| def __init__(self, **kwargs): |
| """ |
| NOTE: this interface is experimental. |
| """ |
| super().__init__(**kwargs) |
| assert ( |
| not self.mask_on and not self.keypoint_on |
| ), "Mask/Keypoints not supported in Rotated ROIHeads." |
| assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!" |
|
|
| @classmethod |
| def _init_box_head(cls, cfg, input_shape): |
| |
| in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES |
| pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION |
| pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) |
| sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO |
| pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE |
| |
| assert pooler_type in ["ROIAlignRotated"], pooler_type |
| |
| in_channels = [input_shape[f].channels for f in in_features][0] |
|
|
| box_pooler = ROIPooler( |
| output_size=pooler_resolution, |
| scales=pooler_scales, |
| sampling_ratio=sampling_ratio, |
| pooler_type=pooler_type, |
| ) |
| box_head = build_box_head( |
| cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) |
| ) |
| |
| box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) |
| return { |
| "box_in_features": in_features, |
| "box_pooler": box_pooler, |
| "box_head": box_head, |
| "box_predictor": box_predictor, |
| } |
|
|
| @torch.no_grad() |
| def label_and_sample_proposals(self, proposals, targets): |
| """ |
| Prepare some proposals to be used to train the RROI heads. |
| It performs box matching between `proposals` and `targets`, and assigns |
| training labels to the proposals. |
| It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, |
| with a fraction of positives that is no larger than `self.positive_sample_fraction. |
| |
| Args: |
| See :meth:`StandardROIHeads.forward` |
| |
| Returns: |
| list[Instances]: length `N` list of `Instances`s containing the proposals |
| sampled for training. Each `Instances` has the following fields: |
| - proposal_boxes: the rotated proposal boxes |
| - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to |
| (this is only meaningful if the proposal has a label > 0; if label = 0 |
| then the ground-truth box is random) |
| - gt_classes: the ground-truth classification lable for each proposal |
| """ |
| if self.proposal_append_gt: |
| proposals = add_ground_truth_to_proposals(targets, proposals) |
|
|
| proposals_with_gt = [] |
|
|
| num_fg_samples = [] |
| num_bg_samples = [] |
| for proposals_per_image, targets_per_image in zip(proposals, targets): |
| has_gt = len(targets_per_image) > 0 |
| match_quality_matrix = pairwise_iou_rotated( |
| targets_per_image.gt_boxes, proposals_per_image.proposal_boxes |
| ) |
| matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) |
| sampled_idxs, gt_classes = self._sample_proposals( |
| matched_idxs, matched_labels, targets_per_image.gt_classes |
| ) |
|
|
| proposals_per_image = proposals_per_image[sampled_idxs] |
| proposals_per_image.gt_classes = gt_classes |
|
|
| if has_gt: |
| sampled_targets = matched_idxs[sampled_idxs] |
| proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] |
|
|
| num_bg_samples.append((gt_classes == self.num_classes).sum().item()) |
| num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) |
| proposals_with_gt.append(proposals_per_image) |
|
|
| |
| storage = get_event_storage() |
| storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) |
| storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) |
|
|
| return proposals_with_gt |
|
|