|
|
|
|
| import math
|
| from typing import Dict
|
| import torch
|
| import torch.nn.functional as F
|
|
|
| from detectron2.layers import ShapeSpec, cat
|
| from detectron2.layers.roi_align_rotated import ROIAlignRotated
|
| from detectron2.modeling import poolers
|
| from detectron2.modeling.proposal_generator import rpn
|
| from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
|
| from detectron2.structures import Boxes, ImageList, Instances, Keypoints, RotatedBoxes
|
|
|
| from .shared import alias, to_device
|
|
|
|
|
| """
|
| This file contains caffe2-compatible implementation of several detectron2 components.
|
| """
|
|
|
|
|
| class Caffe2Boxes(Boxes):
|
| """
|
| Representing a list of detectron2.structures.Boxes from minibatch, each box
|
| is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
|
| (batch index + 5 coordinates) for RotatedBoxes.
|
| """
|
|
|
| def __init__(self, tensor):
|
| assert isinstance(tensor, torch.Tensor)
|
| assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
|
|
|
|
|
| self.tensor = tensor
|
|
|
|
|
|
|
| class InstancesList:
|
| """
|
| Tensor representation of a list of Instances object for a batch of images.
|
|
|
| When dealing with a batch of images with Caffe2 ops, a list of bboxes
|
| (instances) are usually represented by single Tensor with size
|
| (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
|
| for providing common functions to convert between these two representations.
|
| """
|
|
|
| def __init__(self, im_info, indices, extra_fields=None):
|
|
|
| self.im_info = im_info
|
|
|
| self.indices = indices
|
|
|
| self.batch_extra_fields = extra_fields or {}
|
|
|
| self.image_size = self.im_info
|
|
|
| def get_fields(self):
|
| """like `get_fields` in the Instances object,
|
| but return each field in tensor representations"""
|
| ret = {}
|
| for k, v in self.batch_extra_fields.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
| ret[k] = v
|
| return ret
|
|
|
| def has(self, name):
|
| return name in self.batch_extra_fields
|
|
|
| def set(self, name, value):
|
|
|
|
|
|
|
| if isinstance(value, Boxes):
|
| data_len = value.tensor.shape[0]
|
| elif isinstance(value, torch.Tensor):
|
| data_len = value.shape[0]
|
| else:
|
| data_len = len(value)
|
| if len(self.batch_extra_fields):
|
| assert (
|
| len(self) == data_len
|
| ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
|
| self.batch_extra_fields[name] = value
|
|
|
| def __getattr__(self, name):
|
| if name not in self.batch_extra_fields:
|
| raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
|
| return self.batch_extra_fields[name]
|
|
|
| def __len__(self):
|
| return len(self.indices)
|
|
|
| def flatten(self):
|
| ret = []
|
| for _, v in self.batch_extra_fields.items():
|
| if isinstance(v, (Boxes, Keypoints)):
|
| ret.append(v.tensor)
|
| else:
|
| ret.append(v)
|
| return ret
|
|
|
| @staticmethod
|
| def to_d2_instances_list(instances_list):
|
| """
|
| Convert InstancesList to List[Instances]. The input `instances_list` can
|
| also be a List[Instances], in this case this method is a non-op.
|
| """
|
| if not isinstance(instances_list, InstancesList):
|
| assert all(isinstance(x, Instances) for x in instances_list)
|
| return instances_list
|
|
|
| ret = []
|
| for i, info in enumerate(instances_list.im_info):
|
| instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
|
|
|
| ids = instances_list.indices == i
|
| for k, v in instances_list.batch_extra_fields.items():
|
| if isinstance(v, torch.Tensor):
|
| instances.set(k, v[ids])
|
| continue
|
| elif isinstance(v, Boxes):
|
| instances.set(k, v[ids, -4:])
|
| continue
|
|
|
| target_type, tensor_source = v
|
| assert isinstance(tensor_source, torch.Tensor)
|
| assert tensor_source.shape[0] == instances_list.indices.shape[0]
|
| tensor_source = tensor_source[ids]
|
|
|
| if issubclass(target_type, Boxes):
|
| instances.set(k, Boxes(tensor_source[:, -4:]))
|
| elif issubclass(target_type, Keypoints):
|
| instances.set(k, Keypoints(tensor_source))
|
| elif issubclass(target_type, torch.Tensor):
|
| instances.set(k, tensor_source)
|
| else:
|
| raise ValueError("Can't handle targe type: {}".format(target_type))
|
|
|
| ret.append(instances)
|
| return ret
|
|
|
|
|
| class Caffe2Compatible:
|
| """
|
| A model can inherit this class to indicate that it can be traced and deployed with caffe2.
|
| """
|
|
|
| def _get_tensor_mode(self):
|
| return self._tensor_mode
|
|
|
| def _set_tensor_mode(self, v):
|
| self._tensor_mode = v
|
|
|
| tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
|
| """
|
| If true, the model expects C2-style tensor only inputs/outputs format.
|
| """
|
|
|
|
|
| class Caffe2RPN(Caffe2Compatible, rpn.RPN):
|
| @classmethod
|
| def from_config(cls, cfg, input_shape: Dict[str, ShapeSpec]):
|
| ret = super(Caffe2Compatible, cls).from_config(cfg, input_shape)
|
| assert tuple(cfg.MODEL.RPN.BBOX_REG_WEIGHTS) == (1.0, 1.0, 1.0, 1.0) or tuple(
|
| cfg.MODEL.RPN.BBOX_REG_WEIGHTS
|
| ) == (1.0, 1.0, 1.0, 1.0, 1.0)
|
| return ret
|
|
|
| def _generate_proposals(
|
| self, images, objectness_logits_pred, anchor_deltas_pred, gt_instances=None
|
| ):
|
| assert isinstance(images, ImageList)
|
| if self.tensor_mode:
|
| im_info = images.image_sizes
|
| else:
|
| im_info = torch.tensor([[im_sz[0], im_sz[1], 1.0] for im_sz in images.image_sizes]).to(
|
| images.tensor.device
|
| )
|
| assert isinstance(im_info, torch.Tensor)
|
|
|
| rpn_rois_list = []
|
| rpn_roi_probs_list = []
|
| for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
|
| objectness_logits_pred,
|
| anchor_deltas_pred,
|
| [b for (n, b) in self.anchor_generator.cell_anchors.named_buffers()],
|
| self.anchor_generator.strides,
|
| ):
|
| scores = scores.detach()
|
| bbox_deltas = bbox_deltas.detach()
|
|
|
| rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
|
| scores,
|
| bbox_deltas,
|
| im_info,
|
| cell_anchors_tensor,
|
| spatial_scale=1.0 / feat_stride,
|
| pre_nms_topN=self.pre_nms_topk[self.training],
|
| post_nms_topN=self.post_nms_topk[self.training],
|
| nms_thresh=self.nms_thresh,
|
| min_size=self.min_box_size,
|
|
|
| angle_bound_on=True,
|
| angle_bound_lo=-180,
|
| angle_bound_hi=180,
|
| clip_angle_thresh=1.0,
|
| legacy_plus_one=False,
|
| )
|
| rpn_rois_list.append(rpn_rois)
|
| rpn_roi_probs_list.append(rpn_roi_probs)
|
|
|
|
|
|
|
|
|
|
|
|
|
| if len(objectness_logits_pred) == 1:
|
| rpn_rois = rpn_rois_list[0]
|
| rpn_roi_probs = rpn_roi_probs_list[0]
|
| else:
|
| assert len(rpn_rois_list) == len(rpn_roi_probs_list)
|
| rpn_post_nms_topN = self.post_nms_topk[self.training]
|
|
|
| device = rpn_rois_list[0].device
|
| input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
|
|
|
|
|
|
|
| feature_strides = list(self.anchor_generator.strides)
|
| rpn_min_level = int(math.log2(feature_strides[0]))
|
| rpn_max_level = int(math.log2(feature_strides[-1]))
|
| assert (rpn_max_level - rpn_min_level + 1) == len(
|
| rpn_rois_list
|
| ), "CollectRpnProposals requires continuous levels"
|
|
|
| rpn_rois = torch.ops._caffe2.CollectRpnProposals(
|
| input_list,
|
|
|
|
|
|
|
|
|
| rpn_max_level=2 + len(rpn_rois_list) - 1,
|
| rpn_min_level=2,
|
| rpn_post_nms_topN=rpn_post_nms_topN,
|
| )
|
| rpn_rois = to_device(rpn_rois, device)
|
| rpn_roi_probs = []
|
|
|
| proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
|
| return proposals, {}
|
|
|
| def forward(self, images, features, gt_instances=None):
|
| assert not self.training
|
| features = [features[f] for f in self.in_features]
|
| objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
|
| return self._generate_proposals(
|
| images,
|
| objectness_logits_pred,
|
| anchor_deltas_pred,
|
| gt_instances,
|
| )
|
|
|
| @staticmethod
|
| def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
|
| proposals = InstancesList(
|
| im_info=im_info,
|
| indices=rpn_rois[:, 0],
|
| extra_fields={
|
| "proposal_boxes": Caffe2Boxes(rpn_rois),
|
| "objectness_logits": (torch.Tensor, rpn_roi_probs),
|
| },
|
| )
|
| if not tensor_mode:
|
| proposals = InstancesList.to_d2_instances_list(proposals)
|
| else:
|
| proposals = [proposals]
|
| return proposals
|
|
|
|
|
| class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
|
| @staticmethod
|
| def c2_preprocess(box_lists):
|
| assert all(isinstance(x, Boxes) for x in box_lists)
|
| if all(isinstance(x, Caffe2Boxes) for x in box_lists):
|
|
|
| assert len(box_lists) == 1
|
| pooler_fmt_boxes = box_lists[0].tensor
|
| else:
|
| pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
|
| return pooler_fmt_boxes
|
|
|
| def forward(self, x, box_lists):
|
| assert not self.training
|
|
|
| pooler_fmt_boxes = self.c2_preprocess(box_lists)
|
| num_level_assignments = len(self.level_poolers)
|
|
|
| if num_level_assignments == 1:
|
| if isinstance(self.level_poolers[0], ROIAlignRotated):
|
| c2_roi_align = torch.ops._caffe2.RoIAlignRotated
|
| aligned = True
|
| else:
|
| c2_roi_align = torch.ops._caffe2.RoIAlign
|
| aligned = self.level_poolers[0].aligned
|
|
|
| x0 = x[0]
|
| if x0.is_quantized:
|
| x0 = x0.dequantize()
|
|
|
| out = c2_roi_align(
|
| x0,
|
| pooler_fmt_boxes,
|
| order="NCHW",
|
| spatial_scale=float(self.level_poolers[0].spatial_scale),
|
| pooled_h=int(self.output_size[0]),
|
| pooled_w=int(self.output_size[1]),
|
| sampling_ratio=int(self.level_poolers[0].sampling_ratio),
|
| aligned=aligned,
|
| )
|
| return out
|
|
|
| device = pooler_fmt_boxes.device
|
| assert (
|
| self.max_level - self.min_level + 1 == 4
|
| ), "Currently DistributeFpnProposals only support 4 levels"
|
| fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
|
| to_device(pooler_fmt_boxes, "cpu"),
|
| roi_canonical_scale=self.canonical_box_size,
|
| roi_canonical_level=self.canonical_level,
|
| roi_max_level=self.max_level,
|
| roi_min_level=self.min_level,
|
| legacy_plus_one=False,
|
| )
|
| fpn_outputs = [to_device(x, device) for x in fpn_outputs]
|
|
|
| rois_fpn_list = fpn_outputs[:-1]
|
| rois_idx_restore_int32 = fpn_outputs[-1]
|
|
|
| roi_feat_fpn_list = []
|
| for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
|
| if isinstance(pooler, ROIAlignRotated):
|
| c2_roi_align = torch.ops._caffe2.RoIAlignRotated
|
| aligned = True
|
| else:
|
| c2_roi_align = torch.ops._caffe2.RoIAlign
|
| aligned = bool(pooler.aligned)
|
|
|
| if x_level.is_quantized:
|
| x_level = x_level.dequantize()
|
|
|
| roi_feat_fpn = c2_roi_align(
|
| x_level,
|
| roi_fpn,
|
| order="NCHW",
|
| spatial_scale=float(pooler.spatial_scale),
|
| pooled_h=int(self.output_size[0]),
|
| pooled_w=int(self.output_size[1]),
|
| sampling_ratio=int(pooler.sampling_ratio),
|
| aligned=aligned,
|
| )
|
| roi_feat_fpn_list.append(roi_feat_fpn)
|
|
|
| roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
|
| assert roi_feat_shuffled.numel() > 0 and rois_idx_restore_int32.numel() > 0, (
|
| "Caffe2 export requires tracing with a model checkpoint + input that can produce valid"
|
| " detections. But no detections were obtained with the given checkpoint and input!"
|
| )
|
| roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
|
| return roi_feat
|
|
|
|
|
| def caffe2_fast_rcnn_outputs_inference(tensor_mode, box_predictor, predictions, proposals):
|
| """equivalent to FastRCNNOutputLayers.inference"""
|
| num_classes = box_predictor.num_classes
|
| score_thresh = box_predictor.test_score_thresh
|
| nms_thresh = box_predictor.test_nms_thresh
|
| topk_per_image = box_predictor.test_topk_per_image
|
| is_rotated = len(box_predictor.box2box_transform.weights) == 5
|
|
|
| if is_rotated:
|
| box_dim = 5
|
| assert box_predictor.box2box_transform.weights[4] == 1, (
|
| "The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
|
| + " thus enforcing the angle weight to be 1 for now"
|
| )
|
| box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
|
| else:
|
| box_dim = 4
|
| box2box_transform_weights = box_predictor.box2box_transform.weights
|
|
|
| class_logits, box_regression = predictions
|
| if num_classes + 1 == class_logits.shape[1]:
|
| class_prob = F.softmax(class_logits, -1)
|
| else:
|
| assert num_classes == class_logits.shape[1]
|
| class_prob = F.sigmoid(class_logits)
|
|
|
|
|
| class_prob = torch.cat((class_prob, torch.zeros(class_prob.shape[0], 1)), dim=1)
|
|
|
| assert box_regression.shape[1] % box_dim == 0
|
| cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
|
|
|
| input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
|
|
|
| proposal_boxes = proposals[0].proposal_boxes
|
| if isinstance(proposal_boxes, Caffe2Boxes):
|
| rois = Caffe2Boxes.cat([p.proposal_boxes for p in proposals])
|
| elif isinstance(proposal_boxes, RotatedBoxes):
|
| rois = RotatedBoxes.cat([p.proposal_boxes for p in proposals])
|
| elif isinstance(proposal_boxes, Boxes):
|
| rois = Boxes.cat([p.proposal_boxes for p in proposals])
|
| else:
|
| raise NotImplementedError(
|
| 'Expected proposals[0].proposal_boxes to be type "Boxes", '
|
| f"instead got {type(proposal_boxes)}"
|
| )
|
|
|
| device, dtype = rois.tensor.device, rois.tensor.dtype
|
| if input_tensor_mode:
|
| im_info = proposals[0].image_size
|
| rois = rois.tensor
|
| else:
|
| im_info = torch.tensor([[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]])
|
| batch_ids = cat(
|
| [
|
| torch.full((b, 1), i, dtype=dtype, device=device)
|
| for i, b in enumerate(len(p) for p in proposals)
|
| ],
|
| dim=0,
|
| )
|
| rois = torch.cat([batch_ids, rois.tensor], dim=1)
|
|
|
| roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
|
| to_device(rois, "cpu"),
|
| to_device(box_regression, "cpu"),
|
| to_device(im_info, "cpu"),
|
| weights=box2box_transform_weights,
|
| apply_scale=True,
|
| rotated=is_rotated,
|
| angle_bound_on=True,
|
| angle_bound_lo=-180,
|
| angle_bound_hi=180,
|
| clip_angle_thresh=1.0,
|
| legacy_plus_one=False,
|
| )
|
| roi_pred_bbox = to_device(roi_pred_bbox, device)
|
| roi_batch_splits = to_device(roi_batch_splits, device)
|
|
|
| nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
|
| to_device(class_prob, "cpu"),
|
| to_device(roi_pred_bbox, "cpu"),
|
| to_device(roi_batch_splits, "cpu"),
|
| score_thresh=float(score_thresh),
|
| nms=float(nms_thresh),
|
| detections_per_im=int(topk_per_image),
|
| soft_nms_enabled=False,
|
| soft_nms_method="linear",
|
| soft_nms_sigma=0.5,
|
| soft_nms_min_score_thres=0.001,
|
| rotated=is_rotated,
|
| cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
|
| input_boxes_include_bg_cls=False,
|
| output_classes_include_bg_cls=False,
|
| legacy_plus_one=False,
|
| )
|
| roi_score_nms = to_device(nms_outputs[0], device)
|
| roi_bbox_nms = to_device(nms_outputs[1], device)
|
| roi_class_nms = to_device(nms_outputs[2], device)
|
| roi_batch_splits_nms = to_device(nms_outputs[3], device)
|
| roi_keeps_nms = to_device(nms_outputs[4], device)
|
| roi_keeps_size_nms = to_device(nms_outputs[5], device)
|
| if not tensor_mode:
|
| roi_class_nms = roi_class_nms.to(torch.int64)
|
|
|
| roi_batch_ids = cat(
|
| [
|
| torch.full((b, 1), i, dtype=dtype, device=device)
|
| for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
|
| ],
|
| dim=0,
|
| )
|
|
|
| roi_class_nms = alias(roi_class_nms, "class_nms")
|
| roi_score_nms = alias(roi_score_nms, "score_nms")
|
| roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
|
| roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
|
| roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
|
| roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
|
|
|
| results = InstancesList(
|
| im_info=im_info,
|
| indices=roi_batch_ids[:, 0],
|
| extra_fields={
|
| "pred_boxes": Caffe2Boxes(roi_bbox_nms),
|
| "scores": roi_score_nms,
|
| "pred_classes": roi_class_nms,
|
| },
|
| )
|
|
|
| if not tensor_mode:
|
| results = InstancesList.to_d2_instances_list(results)
|
| batch_splits = roi_batch_splits_nms.int().tolist()
|
| kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
|
| else:
|
| results = [results]
|
| kept_indices = [roi_keeps_nms]
|
|
|
| return results, kept_indices
|
|
|
|
|
| class Caffe2FastRCNNOutputsInference:
|
| def __init__(self, tensor_mode):
|
| self.tensor_mode = tensor_mode
|
|
|
| def __call__(self, box_predictor, predictions, proposals):
|
| return caffe2_fast_rcnn_outputs_inference(
|
| self.tensor_mode, box_predictor, predictions, proposals
|
| )
|
|
|
|
|
| def caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances):
|
| """equivalent to mask_head.mask_rcnn_inference"""
|
| if all(isinstance(x, InstancesList) for x in pred_instances):
|
| assert len(pred_instances) == 1
|
| mask_probs_pred = pred_mask_logits.sigmoid()
|
| mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
|
| pred_instances[0].set("pred_masks", mask_probs_pred)
|
| else:
|
| mask_rcnn_inference(pred_mask_logits, pred_instances)
|
|
|
|
|
| class Caffe2MaskRCNNInference:
|
| def __call__(self, pred_mask_logits, pred_instances):
|
| return caffe2_mask_rcnn_inference(pred_mask_logits, pred_instances)
|
|
|
|
|
| def caffe2_keypoint_rcnn_inference(use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances):
|
|
|
|
|
| output = alias(pred_keypoint_logits, "kps_score")
|
| if all(isinstance(x, InstancesList) for x in pred_instances):
|
| assert len(pred_instances) == 1
|
| if use_heatmap_max_keypoint:
|
| device = output.device
|
| output = torch.ops._caffe2.HeatmapMaxKeypoint(
|
| to_device(output, "cpu"),
|
| pred_instances[0].pred_boxes.tensor,
|
| should_output_softmax=True,
|
| )
|
| output = to_device(output, device)
|
| output = alias(output, "keypoints_out")
|
| pred_instances[0].set("pred_keypoints", output)
|
| return pred_keypoint_logits
|
|
|
|
|
| class Caffe2KeypointRCNNInference:
|
| def __init__(self, use_heatmap_max_keypoint):
|
| self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
|
|
|
| def __call__(self, pred_keypoint_logits, pred_instances):
|
| return caffe2_keypoint_rcnn_inference(
|
| self.use_heatmap_max_keypoint, pred_keypoint_logits, pred_instances
|
| )
|
|
|