Spaces:
Sleeping
Sleeping
| # Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """Metrics for instance detection & segmentation.""" | |
| from typing import Any, Dict, Optional, Tuple, Union | |
| import numpy as np | |
| import tensorflow as tf, tf_keras | |
| from official.vision.ops import box_ops | |
| from official.vision.ops import mask_ops | |
| class AveragePrecision(tf_keras.layers.Layer): | |
| """The algorithm which computes average precision from P-R curve.""" | |
| def __init__(self, *args, **kwargs): | |
| # Enforce the `AveragePrecision` to operate in `float32` given the | |
| # implementation requirements. | |
| super().__init__(*args, dtype=tf.float32, **kwargs) | |
| def call(self, precisions, recalls): | |
| """Computes average precision.""" | |
| raise NotImplementedError | |
| class COCOAveragePrecision(AveragePrecision): | |
| """Average precision in COCO style. | |
| In COCO, AP is defined as the mean of interpolated precisions at a set of 101 | |
| equally spaced recall points [0, 0.01, ..., 1]. For each recall point r, | |
| the precision is interpolated to the maximum precision with corresponding | |
| recall r' >= r. | |
| The VOC challenges before 2010 used the similar method, but only 11 recall | |
| points [0, 0.1, ..., 1]. | |
| """ | |
| def __init__( | |
| self, num_recall_eval_points: int = 101, recalls_desc: bool = False | |
| ): | |
| """Initialization for COCOAveragePrecision. | |
| Args: | |
| num_recall_eval_points: the number of equally spaced recall points used | |
| for interpolating the precisions. | |
| recalls_desc: If true, the recalls are in descending order. | |
| """ | |
| super().__init__() | |
| self._num_recall_eval_points = num_recall_eval_points | |
| self._recalls_desc = recalls_desc | |
| def get_config(self) -> Dict[str, Any]: | |
| return { | |
| 'num_recall_eval_points': self._num_recall_eval_points, | |
| 'recalls_desc': self._recalls_desc, | |
| } | |
| def call(self, precisions: tf.Tensor, recalls: tf.Tensor) -> tf.Tensor: | |
| """Computes average precision. | |
| Args: | |
| precisions: a tensor in shape (dim_0, ..., num_confidences) which stores a | |
| list of precision values at different confidence thresholds with | |
| arbitrary numbers of leading dimensions. | |
| recalls: a tensor in shape (dim_0, ..., num_confidences) which stores a | |
| list of recall values at different confidence threshold with arbitrary | |
| numbers of leading dimensions. | |
| Returns: | |
| A tensor in shape (dim_0, ...), which stores the area under P-R curve. | |
| """ | |
| p = precisions | |
| r = recalls | |
| if not isinstance(p, tf.Tensor): | |
| p = tf.convert_to_tensor(p) | |
| if not isinstance(r, tf.Tensor): | |
| r = tf.convert_to_tensor(r) | |
| if self._recalls_desc: | |
| p = tf.reverse(p, axis=[-1]) | |
| r = tf.reverse(r, axis=[-1]) | |
| r_eval_points = tf.linspace(0.0, 1.0, self._num_recall_eval_points) | |
| # (dim_0, ..., num_recall_eval_points) | |
| # For each recall eval point, the precision is interpolated to the maximum | |
| # precision with corresponding recall >= the recall eval point. | |
| p_max = tf.reduce_max( | |
| p[..., tf.newaxis, :] | |
| * tf.cast( | |
| r[..., tf.newaxis, :] >= r_eval_points[:, tf.newaxis], dtype=p.dtype | |
| ), | |
| axis=-1, | |
| ) | |
| # (dim_0, ...) | |
| return tf.reduce_mean(p_max, axis=-1) | |
| class VOC2010AveragePrecision(AveragePrecision): | |
| """Average precision in VOC 2010 style. | |
| Since VOC 2010, first compute an approximation of the measured P-R curve | |
| with precision monotonically decreasing, by setting the precision for recall | |
| r to the maximum precision obtained for any recall r' >= r. Then compute the | |
| AP as the area under this curve by numerical integration. | |
| """ | |
| def __init__(self, recalls_desc: bool = False): | |
| """Initialization for VOC10AveragePrecision. | |
| Args: | |
| recalls_desc: If true, the recalls are in descending order. | |
| """ | |
| super().__init__() | |
| self._recalls_desc = recalls_desc | |
| def get_config(self) -> Dict[str, Any]: | |
| return { | |
| 'recalls_desc': self._recalls_desc, | |
| } | |
| def call(self, precisions: tf.Tensor, recalls: tf.Tensor) -> tf.Tensor: | |
| """Computes average precision. | |
| Args: | |
| precisions: a tensor in shape (dim_0, ..., num_confidences) which stores a | |
| list of precision values at different confidence thresholds with | |
| arbitrary numbers of leading dimensions. | |
| recalls: a tensor in shape (dim_0, ..., num_confidences) which stores a | |
| list of recall values at different confidence threshold with arbitrary | |
| numbers of leading dimensions. | |
| Returns: | |
| A tensor in shape (dim_0, ...), which stores the area under P-R curve. | |
| """ | |
| p = precisions | |
| r = recalls | |
| if not isinstance(p, tf.Tensor): | |
| p = tf.convert_to_tensor(p) | |
| if not isinstance(r, tf.Tensor): | |
| r = tf.convert_to_tensor(r) | |
| if self._recalls_desc: | |
| p = tf.reverse(p, axis=[-1]) | |
| r = tf.reverse(r, axis=[-1]) | |
| axis_indices = list(range(len(p.get_shape()))) | |
| # Transpose to (num_confidences, ...), because tf.scan only applies to the | |
| # first dimension. | |
| p = tf.transpose(p, np.roll(axis_indices, 1)) | |
| # Compute cumulative maximum in reverse order. | |
| # For example, the reverse cumulative maximum of [5,6,3,4,2,1] is | |
| # [6,6,4,4,2,1]. | |
| p = tf.scan( | |
| tf.maximum, elems=p, initializer=tf.reduce_min(p, axis=0), reverse=True | |
| ) | |
| # Transpose back to (..., num_confidences) | |
| p = tf.transpose(p, np.roll(axis_indices, -1)) | |
| # Prepend 0 to r and compute the delta. | |
| r = tf.concat([tf.zeros_like(r[..., 0:1]), r], axis=-1) | |
| delta_r = tf.roll(r, shift=-1, axis=-1) - r | |
| return tf.reduce_sum(p * delta_r[..., :-1], axis=-1) | |
| class MatchingAlgorithm(tf_keras.layers.Layer): | |
| """The algorithm which matches detections to ground truths.""" | |
| def __init__(self, *args, **kwargs): | |
| # Enforce the `MachingAlgorithm` to operate in `float32` given the | |
| # implementation requirements. | |
| super().__init__(*args, dtype=tf.float32, **kwargs) | |
| def call( | |
| self, | |
| detection_to_gt_ious: tf.Tensor, | |
| detection_classes: tf.Tensor, | |
| detection_scores: tf.Tensor, | |
| gt_classes: tf.Tensor, | |
| ): | |
| """Matches detections to ground truths.""" | |
| raise NotImplementedError | |
| class COCOMatchingAlgorithm(MatchingAlgorithm): | |
| """The detection matching algorithm used in COCO.""" | |
| def __init__(self, iou_thresholds: Tuple[float, ...]): | |
| """Initialization for COCOMatchingAlgorithm. | |
| Args: | |
| iou_thresholds: a list of IoU thresholds. | |
| """ | |
| super().__init__() | |
| self._iou_thresholds = iou_thresholds | |
| def get_config(self) -> Dict[str, Any]: | |
| return { | |
| 'iou_thresholds': self._iou_thresholds, | |
| } | |
| def call( | |
| self, | |
| detection_to_gt_ious: tf.Tensor, | |
| detection_classes: tf.Tensor, | |
| detection_scores: tf.Tensor, | |
| gt_classes: tf.Tensor, | |
| ) -> Tuple[tf.Tensor, tf.Tensor]: | |
| """Matches detections to ground truths. | |
| This is the matching algorithm used in COCO. First, sort all the detections | |
| based on the scores from high to low. Then for each detection, iterates | |
| through all ground truth. The unmatched ground truth with the highest IoU | |
| greater than the threshold is matched to the detection. | |
| Args: | |
| detection_to_gt_ious: a tensor in shape of (batch_size, num_detections, | |
| num_gts) which stores the IoUs for each pair of detection and ground | |
| truth. | |
| detection_classes: a tensor in shape of (batch_size, num_detections) which | |
| stores the classes of the detections. | |
| detection_scores: a tensor in shape of (batch_size, num_detections) which | |
| stores the scores of the detections. | |
| gt_classes: a tensor in shape of (batch_size, num_gts) which stores the | |
| classes of the ground truth boxes. | |
| Returns: | |
| Two bool tensors in shape of (batch_size, num_detections, | |
| num_iou_thresholds) and (batch_size, num_gts, num_iou_thresholds) which | |
| indicates whether the detections and ground truths are true positives at | |
| different IoU thresholds. | |
| """ | |
| batch_size = tf.shape(detection_classes)[0] | |
| num_detections = detection_classes.get_shape()[1] | |
| num_gts = gt_classes.get_shape()[1] | |
| num_iou_thresholds = len(self._iou_thresholds) | |
| # (batch_size, num_detections) | |
| sorted_detection_indices = tf.argsort( | |
| detection_scores, axis=1, direction='DESCENDING' | |
| ) | |
| # (batch_size, num_detections) | |
| sorted_detection_classes = tf.gather( | |
| detection_classes, sorted_detection_indices, batch_dims=1 | |
| ) | |
| # (batch_size, num_detections, num_gts) | |
| sorted_detection_to_gt_ious = tf.gather( | |
| detection_to_gt_ious, sorted_detection_indices, batch_dims=1 | |
| ) | |
| init_loop_vars = ( | |
| 0, # i: the loop counter | |
| tf.zeros( | |
| [batch_size, num_detections, num_iou_thresholds], dtype=tf.bool | |
| ), # detection_is_tp | |
| tf.zeros( | |
| [batch_size, num_gts, num_iou_thresholds], dtype=tf.bool | |
| ), # gt_is_tp | |
| ) | |
| def _match_detection_to_gt_loop_body( | |
| i: int, detection_is_tp: tf.Tensor, gt_is_tp: tf.Tensor | |
| ) -> Tuple[int, tf.Tensor, tf.Tensor]: | |
| """Iterates the sorted detections and matches to the ground truths.""" | |
| # (batch_size, num_gts) | |
| gt_ious = sorted_detection_to_gt_ious[:, i, :] | |
| # (batch_size, num_gts, num_iou_thresholds) | |
| gt_matches_detection = ( | |
| # Ground truth is not matched yet. | |
| ~gt_is_tp | |
| # IoU is greater than the threshold. | |
| & (gt_ious[:, :, tf.newaxis] > self._iou_thresholds) | |
| # Classes are matched. | |
| & ( | |
| (sorted_detection_classes[:, i][:, tf.newaxis] == gt_classes) | |
| & (gt_classes > 0) | |
| )[:, :, tf.newaxis] | |
| ) | |
| # Finds the matched ground truth with max IoU. | |
| # If there is no matched ground truth, the argmax op will return index 0 | |
| # in this step. It's fine because it will be masked out in the next step. | |
| # (batch_size, num_iou_thresholds) | |
| matched_gt_with_max_iou = tf.argmax( | |
| tf.cast(gt_matches_detection, gt_ious.dtype) | |
| * gt_ious[:, :, tf.newaxis], | |
| axis=1, | |
| output_type=tf.int32, | |
| ) | |
| # (batch_size, num_gts, num_iou_thresholds) | |
| gt_matches_detection &= tf.one_hot( | |
| matched_gt_with_max_iou, | |
| depth=num_gts, | |
| on_value=True, | |
| off_value=False, | |
| axis=1, | |
| ) | |
| # Updates detection_is_tp | |
| # Map index back to the unsorted detections. | |
| # (batch_size, num_detections, num_iou_thresholds) | |
| detection_is_tp |= ( | |
| tf.reduce_any(gt_matches_detection, axis=1, keepdims=True) | |
| & tf.one_hot( | |
| sorted_detection_indices[:, i], | |
| depth=num_detections, | |
| on_value=True, | |
| off_value=False, | |
| axis=-1, | |
| )[:, :, tf.newaxis] | |
| ) | |
| detection_is_tp.set_shape([None, num_detections, num_iou_thresholds]) | |
| # Updates gt_is_tp | |
| # (batch_size, num_gts, num_iou_thresholds) | |
| gt_is_tp |= gt_matches_detection | |
| gt_is_tp.set_shape([None, num_gts, num_iou_thresholds]) | |
| # Returns the updated loop vars. | |
| return (i + 1, detection_is_tp, gt_is_tp) | |
| _, detection_is_tp_result, gt_is_tp_result = tf.while_loop( | |
| cond=lambda i, *_: i < num_detections, | |
| body=_match_detection_to_gt_loop_body, | |
| loop_vars=init_loop_vars, | |
| parallel_iterations=32, | |
| maximum_iterations=num_detections, | |
| ) | |
| return detection_is_tp_result, gt_is_tp_result | |
| def _shift_and_rescale_boxes( | |
| boxes: tf.Tensor, | |
| output_boundary: Tuple[int, int], | |
| ) -> tf.Tensor: | |
| """Shift and rescale the boxes to fit in the output boundary. | |
| The output boundary of the boxes can be smaller than the original image size | |
| for accelerating the downstream calculations (dynamic mask resizing, mask IoU, | |
| etc.). | |
| For each image of the batch: | |
| (1) find the upper boundary (min_ymin) and the left boundary (min_xmin) of all | |
| the boxes. | |
| (2) shift all the boxes up min_ymin pixels and left min_xmin pixels. | |
| (3) find the new lower boundary (max_ymax) and the right boundary (max_xmax) | |
| of all the boxes. | |
| (4) if max_ymax > output_height or max_xmax > output_width (some boxes don't | |
| fit in the output boundary), downsample all the boxes by ratio: | |
| min(output_height / max_ymax, output_width / max_xmax). The aspect ratio | |
| is not changed. | |
| Args: | |
| boxes: a tensor with a shape of [batch_size, N, 4]. The last dimension is | |
| the pixel coordinates in [ymin, xmin, ymax, xmax] form. | |
| output_boundary: two integers that represent the height and width of the | |
| output. | |
| Returns: | |
| The tensor [batch_size, N, 4] of the output boxes. | |
| """ | |
| boxes = tf.cast(boxes, dtype=tf.float32) | |
| # (batch_size, num_boxes, 1) | |
| is_valid_box = tf.reduce_any( | |
| (boxes[:, :, 2:4] - boxes[:, :, 0:2]) > 0, axis=-1, keepdims=True | |
| ) | |
| # (batch_size, 2) | |
| min_ymin_xmin = tf.reduce_min( | |
| tf.where(is_valid_box, boxes, np.inf)[:, :, 0:2], | |
| axis=1, | |
| ) | |
| # (batch_size, num_boxes, 4) | |
| boxes = tf.where( | |
| is_valid_box, | |
| boxes - tf.tile(min_ymin_xmin, [1, 2])[:, tf.newaxis, :], | |
| 0.0, | |
| ) | |
| # (batch_size,) | |
| max_ymax = tf.reduce_max(boxes[:, :, 2], axis=1) | |
| max_xmax = tf.reduce_max(boxes[:, :, 3], axis=1) | |
| # (batch_size,) | |
| y_resize_ratio = output_boundary[0] / max_ymax | |
| x_resize_ratio = output_boundary[1] / max_xmax | |
| # (batch_size,) | |
| downsampling_ratio = tf.math.minimum( | |
| tf.math.minimum(y_resize_ratio, x_resize_ratio), 1.0 | |
| ) | |
| # (batch_size, num_boxes, 4) | |
| return boxes * downsampling_ratio[:, tf.newaxis, tf.newaxis] | |
| def _count_detection_type( | |
| detection_type_mask: tf.Tensor, | |
| detection_classes: tf.Tensor, | |
| flattened_binned_confidence_one_hot: tf.Tensor, | |
| num_classes: int, | |
| ) -> tf.Tensor: | |
| """Counts detection type grouped by IoU thresholds, classes and confidence bins. | |
| Args: | |
| detection_type_mask: a bool tensor in shape of (batch_size, num_detections, | |
| num_iou_thresholds), which indicate a certain type of detections (e.g. | |
| true postives). | |
| detection_classes: a tensor in shape of (batch_size, num_detections) which | |
| stores the classes of the detections. | |
| flattened_binned_confidence_one_hot: a one-hot bool tensor in shape of | |
| (batch_size * num_detections, num_confidence_bins + 1) which indicates the | |
| binned confidence score of each detection. | |
| num_classes: the number of classes. | |
| Returns: | |
| A tensor in shape of (num_iou_thresholds, num_classes, | |
| num_confidence_bins + 1) which stores the count grouped by IoU thresholds, | |
| classes and confidence bins. | |
| """ | |
| num_iou_thresholds = detection_type_mask.get_shape()[-1] | |
| # (batch_size, num_detections, num_iou_thresholds) | |
| masked_classes = tf.where( | |
| detection_type_mask, detection_classes[..., tf.newaxis], -1 | |
| ) | |
| # (num_iou_thresholds, batch_size * num_detections) | |
| flattened_masked_classes = tf.transpose( | |
| tf.reshape(masked_classes, [-1, num_iou_thresholds]) | |
| ) | |
| # (num_iou_thresholds, num_classes, batch_size * num_detections) | |
| flattened_masked_classes_one_hot = tf.one_hot( | |
| flattened_masked_classes, depth=num_classes, axis=1 | |
| ) | |
| # (num_iou_thresholds * num_classes, batch_size * num_detections) | |
| flattened_masked_classes_one_hot = tf.reshape( | |
| flattened_masked_classes_one_hot, | |
| [num_iou_thresholds * num_classes, -1], | |
| ) | |
| # (num_iou_thresholds * num_classes, num_confidence_bins + 1) | |
| count = tf.matmul( | |
| flattened_masked_classes_one_hot, | |
| tf.cast(flattened_binned_confidence_one_hot, tf.float32), | |
| a_is_sparse=True, | |
| b_is_sparse=True, | |
| ) | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| count = tf.reshape(count, [num_iou_thresholds, num_classes, -1]) | |
| # Clears the count of class 0 (background) | |
| count *= 1.0 - tf.eye(num_classes, 1, dtype=count.dtype) | |
| return count | |
| class InstanceMetrics(tf_keras.metrics.Metric): | |
| """Reports the metrics of instance detection & segmentation.""" | |
| def __init__( | |
| self, | |
| num_classes: int, | |
| use_masks: bool = False, | |
| iou_thresholds: Tuple[float, ...] = (0.5,), | |
| confidence_thresholds: Tuple[float, ...] = (), | |
| num_confidence_bins: int = 1000, | |
| mask_output_boundary: Tuple[int, int] = (640, 640), | |
| matching_algorithm: Optional[MatchingAlgorithm] = None, | |
| average_precision_algorithms: Optional[ | |
| Dict[str, AveragePrecision] | |
| ] = None, | |
| name: Optional[str] = None, | |
| dtype: Optional[Union[str, tf.dtypes.DType]] = tf.float32, | |
| **kwargs | |
| ): | |
| """Initialization for AveragePrecision. | |
| Args: | |
| num_classes: the number of classes. | |
| use_masks: if true, use the masks of the instances when calculating the | |
| metrics, otherwise use the boxes. | |
| iou_thresholds: a sequence of IoU thresholds over which to calculate the | |
| instance metrics. | |
| confidence_thresholds: a sequence of confidence thresholds. If set, also | |
| report precision and recall at each confidence threshold, otherwise, | |
| only report average precision. | |
| num_confidence_bins: the number of confidence bins used for bin sort. | |
| mask_output_boundary: two integers that represent the height and width of | |
| the boundary where the resized instance masks are pasted. For each | |
| example, if any of the detection or ground truth boxes is out of the | |
| boundary, shift and resize all the detection and ground truth boxes of | |
| the example to fit them into the boundary. The output boundary of the | |
| pasted masks can be smaller than the real image size for accelerating | |
| the calculation. | |
| matching_algorithm: the algorithm which matches detections to ground | |
| truths. | |
| average_precision_algorithms: the algorithms which compute average | |
| precision from P-R curve. The keys are used in the metrics results. | |
| name: the name of the metric instance. | |
| dtype: data type of the metric result. | |
| **kwargs: Additional keywords arguments. | |
| """ | |
| super().__init__(name=name, dtype=dtype, **kwargs) | |
| self._num_classes = num_classes | |
| self._use_masks = use_masks | |
| self._iou_thresholds = iou_thresholds | |
| self._confidence_thresholds = confidence_thresholds | |
| self._num_iou_thresholds = len(iou_thresholds) | |
| self._num_confidence_bins = num_confidence_bins | |
| self._mask_output_boundary = mask_output_boundary | |
| if not matching_algorithm: | |
| self._matching_algorithm = COCOMatchingAlgorithm(iou_thresholds) | |
| else: | |
| self._matching_algorithm = matching_algorithm | |
| if not average_precision_algorithms: | |
| self._average_precision_algorithms = {'ap': COCOAveragePrecision()} | |
| else: | |
| self._average_precision_algorithms = average_precision_algorithms | |
| # Variables | |
| self.tp_count = self.add_weight( | |
| 'tp_count', | |
| shape=[ | |
| self._num_iou_thresholds, | |
| self._num_classes, | |
| self._num_confidence_bins + 1, | |
| ], | |
| initializer='zeros', | |
| dtype=tf.float32, | |
| ) | |
| self.fp_count = self.add_weight( | |
| 'fp_count', | |
| shape=[ | |
| self._num_iou_thresholds, | |
| self._num_classes, | |
| self._num_confidence_bins + 1, | |
| ], | |
| initializer='zeros', | |
| dtype=tf.float32, | |
| ) | |
| self.gt_count = self.add_weight( | |
| 'gt_count', | |
| shape=[self._num_classes], | |
| initializer='zeros', | |
| dtype=tf.float32, | |
| ) | |
| def get_config(self) -> Dict[str, Any]: | |
| """Returns the serializable config of the metric.""" | |
| return { | |
| 'num_classes': self._num_classes, | |
| 'use_masks': self._use_masks, | |
| 'iou_thresholds': self._iou_thresholds, | |
| 'confidence_thresholds': self._confidence_thresholds, | |
| 'num_confidence_bins': self._num_confidence_bins, | |
| 'mask_output_boundary': self._mask_output_boundary, | |
| 'matching_algorithm': self._matching_algorithm, | |
| 'average_precision_algorithms': self._average_precision_algorithms, | |
| 'name': self.name, | |
| 'dtype': self.dtype, | |
| } | |
| def reset_state(self): | |
| """Resets all of the metric state variables.""" | |
| self.tp_count.assign(tf.zeros_like(self.tp_count)) | |
| self.fp_count.assign(tf.zeros_like(self.fp_count)) | |
| self.gt_count.assign(tf.zeros_like(self.gt_count)) | |
| def update_state( | |
| self, y_true: Dict[str, tf.Tensor], y_pred: Dict[str, tf.Tensor] | |
| ): | |
| # (batch_size, num_detections, 4) in absolute coordinates. | |
| detection_boxes = tf.cast(y_pred['detection_boxes'], tf.float32) | |
| # (batch_size, num_detections) | |
| detection_classes = tf.cast(y_pred['detection_classes'], tf.int32) | |
| # (batch_size, num_detections) | |
| detection_scores = tf.cast(y_pred['detection_scores'], tf.float32) | |
| # (batch_size, num_gts, 4) in absolute coordinates. | |
| gt_boxes = tf.cast(y_true['boxes'], tf.float32) | |
| # (batch_size, num_gts) | |
| gt_classes = tf.cast(y_true['classes'], tf.int32) | |
| # (batch_size, num_gts) | |
| if 'is_crowds' in y_true: | |
| gt_is_crowd = tf.cast(y_true['is_crowds'], tf.bool) | |
| else: | |
| gt_is_crowd = tf.zeros_like(gt_classes, dtype=tf.bool) | |
| image_scale = tf.tile(y_true['image_info'][:, 2:3, :], multiples=[1, 1, 2]) | |
| detection_boxes = detection_boxes / tf.cast( | |
| image_scale, dtype=detection_boxes.dtype | |
| ) | |
| # Step 1: Computes IoUs between the detections and the non-crowd ground | |
| # truths and IoAs between the detections and the crowd ground truths. | |
| if not self._use_masks: | |
| # (batch_size, num_detections, num_gts) | |
| detection_to_gt_ious = box_ops.bbox_overlap(detection_boxes, gt_boxes) | |
| detection_to_gt_ioas = box_ops.bbox_intersection_over_area( | |
| detection_boxes, gt_boxes | |
| ) | |
| else: | |
| # Use outer boxes to generate the masks if available. | |
| if 'detection_outer_boxes' in y_pred: | |
| detection_boxes = tf.cast(y_pred['detection_outer_boxes'], tf.float32) | |
| # (batch_size, num_detections, mask_height, mask_width) | |
| detection_masks = tf.cast(y_pred['detection_masks'], tf.float32) | |
| # (batch_size, num_gts, gt_mask_height, gt_mask_width) | |
| gt_masks = tf.cast(y_true['masks'], tf.float32) | |
| num_detections = detection_boxes.get_shape()[1] | |
| # (batch_size, num_detections + num_gts, 4) | |
| all_boxes = _shift_and_rescale_boxes( | |
| tf.concat([detection_boxes, gt_boxes], axis=1), | |
| self._mask_output_boundary, | |
| ) | |
| detection_boxes = all_boxes[:, :num_detections, :] | |
| gt_boxes = all_boxes[:, num_detections:, :] | |
| # (batch_size, num_detections, num_gts) | |
| detection_to_gt_ious, detection_to_gt_ioas = ( | |
| mask_ops.instance_masks_overlap( | |
| detection_boxes, | |
| detection_masks, | |
| gt_boxes, | |
| gt_masks, | |
| output_size=self._mask_output_boundary, | |
| ) | |
| ) | |
| # (batch_size, num_detections, num_gts) | |
| detection_to_gt_ious = tf.where( | |
| gt_is_crowd[:, tf.newaxis, :], 0.0, detection_to_gt_ious | |
| ) | |
| detection_to_crowd_ioas = tf.where( | |
| gt_is_crowd[:, tf.newaxis, :], detection_to_gt_ioas, 0.0 | |
| ) | |
| # Step 2: counts true positives grouped by IoU thresholds, classes and | |
| # confidence bins. | |
| # (batch_size, num_detections, num_iou_thresholds) | |
| detection_is_tp, _ = self._matching_algorithm( | |
| detection_to_gt_ious, detection_classes, detection_scores, gt_classes | |
| ) | |
| # (batch_size * num_detections,) | |
| flattened_binned_confidence = tf.reshape( | |
| tf.cast(detection_scores * self._num_confidence_bins, tf.int32), [-1] | |
| ) | |
| # (batch_size * num_detections, num_confidence_bins + 1) | |
| flattened_binned_confidence_one_hot = tf.one_hot( | |
| flattened_binned_confidence, self._num_confidence_bins + 1, axis=1 | |
| ) | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| tp_count = _count_detection_type( | |
| detection_is_tp, | |
| detection_classes, | |
| flattened_binned_confidence_one_hot, | |
| self._num_classes, | |
| ) | |
| # Step 3: Counts false positives grouped by IoU thresholds, classes and | |
| # confidence bins. | |
| # False positive: detection is not true positive (see above) and not part of | |
| # the crowd ground truth with the same class. | |
| # (batch_size, num_detections, num_gts, num_iou_thresholds) | |
| detection_matches_crowd = ( | |
| (detection_to_crowd_ioas[..., tf.newaxis] > self._iou_thresholds) | |
| & ( | |
| detection_classes[:, :, tf.newaxis, tf.newaxis] | |
| == gt_classes[:, tf.newaxis, :, tf.newaxis] | |
| ) | |
| & (detection_classes[:, :, tf.newaxis, tf.newaxis] > 0) | |
| ) | |
| # (batch_size, num_detections, num_iou_thresholds) | |
| detection_matches_any_crowd = tf.reduce_any( | |
| detection_matches_crowd & ~detection_is_tp[:, :, tf.newaxis, :], axis=2 | |
| ) | |
| detection_is_fp = ~detection_is_tp & ~detection_matches_any_crowd | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| fp_count = _count_detection_type( | |
| detection_is_fp, | |
| detection_classes, | |
| flattened_binned_confidence_one_hot, | |
| self._num_classes, | |
| ) | |
| # Step 4: Counts non-crowd groundtruths grouped by classes. | |
| # (num_classes, ) | |
| gt_count = tf.reduce_sum( | |
| tf.one_hot( | |
| tf.where(gt_is_crowd, -1, gt_classes), self._num_classes, axis=-1 | |
| ), | |
| axis=[0, 1], | |
| ) | |
| # Clears the count of class 0 (background). | |
| gt_count *= 1.0 - tf.eye(1, self._num_classes, dtype=gt_count.dtype)[0] | |
| # Accumulates the variables. | |
| self.fp_count.assign_add(tf.cast(fp_count, self.fp_count.dtype)) | |
| self.tp_count.assign_add(tf.cast(tp_count, self.tp_count.dtype)) | |
| self.gt_count.assign_add(tf.cast(gt_count, self.gt_count.dtype)) | |
| def result(self) -> Dict[str, tf.Tensor]: | |
| """Returns the metrics values as a dict. | |
| Returns: | |
| A `dict` containing: | |
| 'ap': a float tensor in shape (num_iou_thresholds, num_classes) which | |
| stores the average precision of each class at different IoU thresholds. | |
| 'precision': a float tensor in shape (num_confidence_thresholds, | |
| num_iou_thresholds, num_classes) which stores the precision of each | |
| class at different confidence thresholds & IoU thresholds. | |
| 'recall': a float tensor in shape (num_confidence_thresholds, | |
| num_iou_thresholds, num_classes) which stores the recall of each | |
| class at different confidence thresholds & IoU thresholds. | |
| 'valid_classes': a bool tensor in shape (num_classes,). If False, there | |
| is no instance of the class in the ground truth. | |
| """ | |
| result = { | |
| # (num_classes,) | |
| 'valid_classes': self.gt_count != 0, | |
| } | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| tp_count_cum_by_confidence = tf.math.cumsum( | |
| self.tp_count, axis=-1, reverse=True | |
| ) | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| fp_count_cum_by_confidence = tf.math.cumsum( | |
| self.fp_count, axis=-1, reverse=True | |
| ) | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| precisions = tf.math.divide_no_nan( | |
| tp_count_cum_by_confidence, | |
| tp_count_cum_by_confidence + fp_count_cum_by_confidence, | |
| ) | |
| # (num_iou_thresholds, num_classes, num_confidence_bins + 1) | |
| recalls = tf.math.divide_no_nan( | |
| tp_count_cum_by_confidence, self.gt_count[..., tf.newaxis] | |
| ) | |
| if self._confidence_thresholds: | |
| # If confidence_thresholds is set, reports precision and recall at each | |
| # confidence threshold. | |
| confidence_thresholds = tf.cast( | |
| tf.constant(self._confidence_thresholds, dtype=tf.float32) | |
| * self._num_confidence_bins, | |
| dtype=tf.int32, | |
| ) | |
| # (num_confidence_thresholds, num_iou_thresholds, num_classes) | |
| result['precisions'] = tf.gather( | |
| tf.transpose(precisions, [2, 0, 1]), confidence_thresholds | |
| ) | |
| result['recalls'] = tf.gather( | |
| tf.transpose(recalls, [2, 0, 1]), confidence_thresholds | |
| ) | |
| precisions = tf.reverse(precisions, axis=[-1]) | |
| recalls = tf.reverse(recalls, axis=[-1]) | |
| result.update( | |
| { | |
| # (num_iou_thresholds, num_classes) | |
| key: ap_algorithm(precisions, recalls) | |
| for key, ap_algorithm in self._average_precision_algorithms.items() | |
| } | |
| ) | |
| return result | |
| def get_average_precision_metrics_keys(self): | |
| """Gets the keys of the average precision metrics in the results.""" | |
| return self._average_precision_algorithms.keys() | |