| | |
| | import torch |
| | from torch.nn import functional as F |
| |
|
| | from detectron2.structures import Instances, ROIMasks |
| |
|
| |
|
| | |
| | def detector_postprocess( |
| | results: Instances, output_height: int, output_width: int, mask_threshold: float = 0.5 |
| | ): |
| | """ |
| | Resize the output instances. |
| | The input images are often resized when entering an object detector. |
| | As a result, we often need the outputs of the detector in a different |
| | resolution from its inputs. |
| | |
| | This function will resize the raw outputs of an R-CNN detector |
| | to produce outputs according to the desired output resolution. |
| | |
| | Args: |
| | results (Instances): the raw outputs from the detector. |
| | `results.image_size` contains the input image resolution the detector sees. |
| | This object might be modified in-place. |
| | output_height, output_width: the desired output resolution. |
| | Returns: |
| | Instances: the resized output from the model, based on the output resolution |
| | """ |
| | if isinstance(output_width, torch.Tensor): |
| | |
| | |
| | |
| | output_width_tmp = output_width.float() |
| | output_height_tmp = output_height.float() |
| | new_size = torch.stack([output_height, output_width]) |
| | else: |
| | new_size = (output_height, output_width) |
| | output_width_tmp = output_width |
| | output_height_tmp = output_height |
| |
|
| | scale_x, scale_y = ( |
| | output_width_tmp / results.image_size[1], |
| | output_height_tmp / results.image_size[0], |
| | ) |
| | results = Instances(new_size, **results.get_fields()) |
| |
|
| | if results.has("pred_boxes"): |
| | output_boxes = results.pred_boxes |
| | elif results.has("proposal_boxes"): |
| | output_boxes = results.proposal_boxes |
| | else: |
| | output_boxes = None |
| | assert output_boxes is not None, "Predictions must contain boxes!" |
| |
|
| | output_boxes.scale(scale_x, scale_y) |
| | output_boxes.clip(results.image_size) |
| |
|
| | results = results[output_boxes.nonempty()] |
| |
|
| | if results.has("pred_masks"): |
| | if isinstance(results.pred_masks, ROIMasks): |
| | roi_masks = results.pred_masks |
| | else: |
| | |
| | roi_masks = ROIMasks(results.pred_masks[:, 0, :, :]) |
| | results.pred_masks = roi_masks.to_bitmasks( |
| | results.pred_boxes, output_height, output_width, mask_threshold |
| | ).tensor |
| |
|
| | if results.has("pred_keypoints"): |
| | results.pred_keypoints[:, :, 0] *= scale_x |
| | results.pred_keypoints[:, :, 1] *= scale_y |
| |
|
| | return results |
| |
|
| |
|
| | def sem_seg_postprocess(result, img_size, output_height, output_width): |
| | """ |
| | Return semantic segmentation predictions in the original resolution. |
| | |
| | The input images are often resized when entering semantic segmentor. Moreover, in same |
| | cases, they also padded inside segmentor to be divisible by maximum network stride. |
| | As a result, we often need the predictions of the segmentor in a different |
| | resolution from its inputs. |
| | |
| | Args: |
| | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), |
| | where C is the number of classes, and H, W are the height and width of the prediction. |
| | img_size (tuple): image size that segmentor is taking as input. |
| | output_height, output_width: the desired output resolution. |
| | |
| | Returns: |
| | semantic segmentation prediction (Tensor): A tensor of the shape |
| | (C, output_height, output_width) that contains per-pixel soft predictions. |
| | """ |
| | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) |
| | result = F.interpolate( |
| | result, size=(output_height, output_width), mode="bilinear", align_corners=False |
| | )[0] |
| | return result |
| |
|