|
|
| import logging
|
| import math
|
| from typing import List, Tuple, Union
|
| import torch
|
|
|
| from detectron2.layers import batched_nms, cat, move_device_like
|
| from detectron2.structures import Boxes, Instances
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| def _is_tracing():
|
|
|
| if torch.jit.is_scripting():
|
|
|
| return False
|
| else:
|
| return torch.jit.is_tracing()
|
|
|
|
|
| def find_top_rpn_proposals(
|
| proposals: List[torch.Tensor],
|
| pred_objectness_logits: List[torch.Tensor],
|
| image_sizes: List[Tuple[int, int]],
|
| nms_thresh: float,
|
| pre_nms_topk: int,
|
| post_nms_topk: int,
|
| min_box_size: float,
|
| training: bool,
|
| ):
|
| """
|
| For each feature map, select the `pre_nms_topk` highest scoring proposals,
|
| apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
|
| highest scoring proposals among all the feature maps for each image.
|
|
|
| Args:
|
| proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4).
|
| All proposal predictions on the feature maps.
|
| pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
|
| image_sizes (list[tuple]): sizes (h, w) for each image
|
| nms_thresh (float): IoU threshold to use for NMS
|
| pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
|
| When RPN is run on multiple feature maps (as in FPN) this number is per
|
| feature map.
|
| post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
|
| When RPN is run on multiple feature maps (as in FPN) this number is total,
|
| over all feature maps.
|
| min_box_size (float): minimum proposal box side length in pixels (absolute units
|
| wrt input images).
|
| training (bool): True if proposals are to be used in training, otherwise False.
|
| This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
|
| comment.
|
|
|
| Returns:
|
| list[Instances]: list of N Instances. The i-th Instances
|
| stores post_nms_topk object proposals for image i, sorted by their
|
| objectness score in descending order.
|
| """
|
| num_images = len(image_sizes)
|
| device = (
|
| proposals[0].device
|
| if torch.jit.is_scripting()
|
| else ("cpu" if torch.jit.is_tracing() else proposals[0].device)
|
| )
|
|
|
|
|
| topk_scores = []
|
| topk_proposals = []
|
| level_ids = []
|
| batch_idx = move_device_like(torch.arange(num_images, device=device), proposals[0])
|
| for level_id, (proposals_i, logits_i) in enumerate(zip(proposals, pred_objectness_logits)):
|
| Hi_Wi_A = logits_i.shape[1]
|
| if isinstance(Hi_Wi_A, torch.Tensor):
|
| num_proposals_i = torch.clamp(Hi_Wi_A, max=pre_nms_topk)
|
| else:
|
| num_proposals_i = min(Hi_Wi_A, pre_nms_topk)
|
|
|
| topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
|
|
|
|
|
| topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx]
|
|
|
| topk_proposals.append(topk_proposals_i)
|
| topk_scores.append(topk_scores_i)
|
| level_ids.append(
|
| move_device_like(
|
| torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device),
|
| proposals[0],
|
| )
|
| )
|
|
|
|
|
| topk_scores = cat(topk_scores, dim=1)
|
| topk_proposals = cat(topk_proposals, dim=1)
|
| level_ids = cat(level_ids, dim=0)
|
|
|
|
|
| results: List[Instances] = []
|
| for n, image_size in enumerate(image_sizes):
|
| boxes = Boxes(topk_proposals[n])
|
| scores_per_img = topk_scores[n]
|
| lvl = level_ids
|
|
|
| valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
|
| if not valid_mask.all():
|
| if training:
|
| raise FloatingPointError(
|
| "Predicted boxes or scores contain Inf/NaN. Training has diverged."
|
| )
|
| boxes = boxes[valid_mask]
|
| scores_per_img = scores_per_img[valid_mask]
|
| lvl = lvl[valid_mask]
|
| boxes.clip(image_size)
|
|
|
|
|
| keep = boxes.nonempty(threshold=min_box_size)
|
| if _is_tracing() or keep.sum().item() != len(boxes):
|
| boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep]
|
|
|
| keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| keep = keep[:post_nms_topk]
|
|
|
| res = Instances(image_size)
|
| res.proposal_boxes = boxes[keep]
|
| res.objectness_logits = scores_per_img[keep]
|
| results.append(res)
|
| return results
|
|
|
|
|
| def add_ground_truth_to_proposals(
|
| gt: Union[List[Instances], List[Boxes]], proposals: List[Instances]
|
| ) -> List[Instances]:
|
| """
|
| Call `add_ground_truth_to_proposals_single_image` for all images.
|
|
|
| Args:
|
| gt(Union[List[Instances], List[Boxes]): list of N elements. Element i is a Instances
|
| representing the ground-truth for image i.
|
| proposals (list[Instances]): list of N elements. Element i is a Instances
|
| representing the proposals for image i.
|
|
|
| Returns:
|
| list[Instances]: list of N Instances. Each is the proposals for the image,
|
| with field "proposal_boxes" and "objectness_logits".
|
| """
|
| assert gt is not None
|
|
|
| if len(proposals) != len(gt):
|
| raise ValueError("proposals and gt should have the same length as the number of images!")
|
| if len(proposals) == 0:
|
| return proposals
|
|
|
| return [
|
| add_ground_truth_to_proposals_single_image(gt_i, proposals_i)
|
| for gt_i, proposals_i in zip(gt, proposals)
|
| ]
|
|
|
|
|
| def add_ground_truth_to_proposals_single_image(
|
| gt: Union[Instances, Boxes], proposals: Instances
|
| ) -> Instances:
|
| """
|
| Augment `proposals` with `gt`.
|
|
|
| Args:
|
| Same as `add_ground_truth_to_proposals`, but with gt and proposals
|
| per image.
|
|
|
| Returns:
|
| Same as `add_ground_truth_to_proposals`, but for only one image.
|
| """
|
| if isinstance(gt, Boxes):
|
|
|
| gt = Instances(proposals.image_size, gt_boxes=gt)
|
|
|
| gt_boxes = gt.gt_boxes
|
| device = proposals.objectness_logits.device
|
|
|
|
|
| gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
|
| gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
|
|
|
|
|
| gt_proposal = Instances(proposals.image_size, **gt.get_fields())
|
| gt_proposal.proposal_boxes = gt_boxes
|
| gt_proposal.objectness_logits = gt_logits
|
|
|
| for key in proposals.get_fields().keys():
|
| assert gt_proposal.has(
|
| key
|
| ), "The attribute '{}' in `proposals` does not exist in `gt`".format(key)
|
|
|
|
|
|
|
| new_proposals = Instances.cat([proposals, gt_proposal])
|
|
|
| return new_proposals
|
|
|