|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| """Metrics for instance detection & segmentation."""
|
|
|
| from typing import Any, Dict, Optional, Tuple, Union
|
|
|
| import numpy as np
|
| import tensorflow as tf, tf_keras
|
|
|
| from official.vision.ops import box_ops
|
| from official.vision.ops import mask_ops
|
|
|
|
|
| class AveragePrecision(tf_keras.layers.Layer):
|
| """The algorithm which computes average precision from P-R curve."""
|
|
|
| def __init__(self, *args, **kwargs):
|
|
|
|
|
| super().__init__(*args, dtype=tf.float32, **kwargs)
|
|
|
| def call(self, precisions, recalls):
|
| """Computes average precision."""
|
| raise NotImplementedError
|
|
|
|
|
| class COCOAveragePrecision(AveragePrecision):
|
| """Average precision in COCO style.
|
|
|
| In COCO, AP is defined as the mean of interpolated precisions at a set of 101
|
| equally spaced recall points [0, 0.01, ..., 1]. For each recall point r,
|
| the precision is interpolated to the maximum precision with corresponding
|
| recall r' >= r.
|
|
|
| The VOC challenges before 2010 used the similar method, but only 11 recall
|
| points [0, 0.1, ..., 1].
|
| """
|
|
|
| def __init__(
|
| self, num_recall_eval_points: int = 101, recalls_desc: bool = False
|
| ):
|
| """Initialization for COCOAveragePrecision.
|
|
|
| Args:
|
| num_recall_eval_points: the number of equally spaced recall points used
|
| for interpolating the precisions.
|
| recalls_desc: If true, the recalls are in descending order.
|
| """
|
| super().__init__()
|
| self._num_recall_eval_points = num_recall_eval_points
|
| self._recalls_desc = recalls_desc
|
|
|
| def get_config(self) -> Dict[str, Any]:
|
| return {
|
| 'num_recall_eval_points': self._num_recall_eval_points,
|
| 'recalls_desc': self._recalls_desc,
|
| }
|
|
|
| def call(self, precisions: tf.Tensor, recalls: tf.Tensor) -> tf.Tensor:
|
| """Computes average precision.
|
|
|
| Args:
|
| precisions: a tensor in shape (dim_0, ..., num_confidences) which stores a
|
| list of precision values at different confidence thresholds with
|
| arbitrary numbers of leading dimensions.
|
| recalls: a tensor in shape (dim_0, ..., num_confidences) which stores a
|
| list of recall values at different confidence threshold with arbitrary
|
| numbers of leading dimensions.
|
|
|
| Returns:
|
| A tensor in shape (dim_0, ...), which stores the area under P-R curve.
|
| """
|
| p = precisions
|
| r = recalls
|
|
|
| if not isinstance(p, tf.Tensor):
|
| p = tf.convert_to_tensor(p)
|
| if not isinstance(r, tf.Tensor):
|
| r = tf.convert_to_tensor(r)
|
|
|
| if self._recalls_desc:
|
| p = tf.reverse(p, axis=[-1])
|
| r = tf.reverse(r, axis=[-1])
|
|
|
| r_eval_points = tf.linspace(0.0, 1.0, self._num_recall_eval_points)
|
|
|
|
|
|
|
| p_max = tf.reduce_max(
|
| p[..., tf.newaxis, :]
|
| * tf.cast(
|
| r[..., tf.newaxis, :] >= r_eval_points[:, tf.newaxis], dtype=p.dtype
|
| ),
|
| axis=-1,
|
| )
|
|
|
| return tf.reduce_mean(p_max, axis=-1)
|
|
|
|
|
| class VOC2010AveragePrecision(AveragePrecision):
|
| """Average precision in VOC 2010 style.
|
|
|
| Since VOC 2010, first compute an approximation of the measured P-R curve
|
| with precision monotonically decreasing, by setting the precision for recall
|
| r to the maximum precision obtained for any recall r' >= r. Then compute the
|
| AP as the area under this curve by numerical integration.
|
| """
|
|
|
| def __init__(self, recalls_desc: bool = False):
|
| """Initialization for VOC10AveragePrecision.
|
|
|
| Args:
|
| recalls_desc: If true, the recalls are in descending order.
|
| """
|
| super().__init__()
|
| self._recalls_desc = recalls_desc
|
|
|
| def get_config(self) -> Dict[str, Any]:
|
| return {
|
| 'recalls_desc': self._recalls_desc,
|
| }
|
|
|
| def call(self, precisions: tf.Tensor, recalls: tf.Tensor) -> tf.Tensor:
|
| """Computes average precision.
|
|
|
| Args:
|
| precisions: a tensor in shape (dim_0, ..., num_confidences) which stores a
|
| list of precision values at different confidence thresholds with
|
| arbitrary numbers of leading dimensions.
|
| recalls: a tensor in shape (dim_0, ..., num_confidences) which stores a
|
| list of recall values at different confidence threshold with arbitrary
|
| numbers of leading dimensions.
|
|
|
| Returns:
|
| A tensor in shape (dim_0, ...), which stores the area under P-R curve.
|
| """
|
| p = precisions
|
| r = recalls
|
|
|
| if not isinstance(p, tf.Tensor):
|
| p = tf.convert_to_tensor(p)
|
| if not isinstance(r, tf.Tensor):
|
| r = tf.convert_to_tensor(r)
|
|
|
| if self._recalls_desc:
|
| p = tf.reverse(p, axis=[-1])
|
| r = tf.reverse(r, axis=[-1])
|
|
|
| axis_indices = list(range(len(p.get_shape())))
|
|
|
|
|
|
|
| p = tf.transpose(p, np.roll(axis_indices, 1))
|
|
|
|
|
|
|
| p = tf.scan(
|
| tf.maximum, elems=p, initializer=tf.reduce_min(p, axis=0), reverse=True
|
| )
|
|
|
| p = tf.transpose(p, np.roll(axis_indices, -1))
|
|
|
|
|
| r = tf.concat([tf.zeros_like(r[..., 0:1]), r], axis=-1)
|
| delta_r = tf.roll(r, shift=-1, axis=-1) - r
|
|
|
| return tf.reduce_sum(p * delta_r[..., :-1], axis=-1)
|
|
|
|
|
| class MatchingAlgorithm(tf_keras.layers.Layer):
|
| """The algorithm which matches detections to ground truths."""
|
|
|
| def __init__(self, *args, **kwargs):
|
|
|
|
|
| super().__init__(*args, dtype=tf.float32, **kwargs)
|
|
|
| def call(
|
| self,
|
| detection_to_gt_ious: tf.Tensor,
|
| detection_classes: tf.Tensor,
|
| detection_scores: tf.Tensor,
|
| gt_classes: tf.Tensor,
|
| ):
|
| """Matches detections to ground truths."""
|
| raise NotImplementedError
|
|
|
|
|
| class COCOMatchingAlgorithm(MatchingAlgorithm):
|
| """The detection matching algorithm used in COCO."""
|
|
|
| def __init__(self, iou_thresholds: Tuple[float, ...]):
|
| """Initialization for COCOMatchingAlgorithm.
|
|
|
| Args:
|
| iou_thresholds: a list of IoU thresholds.
|
| """
|
| super().__init__()
|
| self._iou_thresholds = iou_thresholds
|
|
|
| def get_config(self) -> Dict[str, Any]:
|
| return {
|
| 'iou_thresholds': self._iou_thresholds,
|
| }
|
|
|
| def call(
|
| self,
|
| detection_to_gt_ious: tf.Tensor,
|
| detection_classes: tf.Tensor,
|
| detection_scores: tf.Tensor,
|
| gt_classes: tf.Tensor,
|
| ) -> Tuple[tf.Tensor, tf.Tensor]:
|
| """Matches detections to ground truths.
|
|
|
| This is the matching algorithm used in COCO. First, sort all the detections
|
| based on the scores from high to low. Then for each detection, iterates
|
| through all ground truth. The unmatched ground truth with the highest IoU
|
| greater than the threshold is matched to the detection.
|
|
|
| Args:
|
| detection_to_gt_ious: a tensor in shape of (batch_size, num_detections,
|
| num_gts) which stores the IoUs for each pair of detection and ground
|
| truth.
|
| detection_classes: a tensor in shape of (batch_size, num_detections) which
|
| stores the classes of the detections.
|
| detection_scores: a tensor in shape of (batch_size, num_detections) which
|
| stores the scores of the detections.
|
| gt_classes: a tensor in shape of (batch_size, num_gts) which stores the
|
| classes of the ground truth boxes.
|
|
|
| Returns:
|
| Two bool tensors in shape of (batch_size, num_detections,
|
| num_iou_thresholds) and (batch_size, num_gts, num_iou_thresholds) which
|
| indicates whether the detections and ground truths are true positives at
|
| different IoU thresholds.
|
| """
|
| batch_size = tf.shape(detection_classes)[0]
|
| num_detections = detection_classes.get_shape()[1]
|
| num_gts = gt_classes.get_shape()[1]
|
| num_iou_thresholds = len(self._iou_thresholds)
|
|
|
|
|
| sorted_detection_indices = tf.argsort(
|
| detection_scores, axis=1, direction='DESCENDING'
|
| )
|
|
|
| sorted_detection_classes = tf.gather(
|
| detection_classes, sorted_detection_indices, batch_dims=1
|
| )
|
|
|
| sorted_detection_to_gt_ious = tf.gather(
|
| detection_to_gt_ious, sorted_detection_indices, batch_dims=1
|
| )
|
|
|
| init_loop_vars = (
|
| 0,
|
| tf.zeros(
|
| [batch_size, num_detections, num_iou_thresholds], dtype=tf.bool
|
| ),
|
| tf.zeros(
|
| [batch_size, num_gts, num_iou_thresholds], dtype=tf.bool
|
| ),
|
| )
|
|
|
| def _match_detection_to_gt_loop_body(
|
| i: int, detection_is_tp: tf.Tensor, gt_is_tp: tf.Tensor
|
| ) -> Tuple[int, tf.Tensor, tf.Tensor]:
|
| """Iterates the sorted detections and matches to the ground truths."""
|
|
|
| gt_ious = sorted_detection_to_gt_ious[:, i, :]
|
|
|
| gt_matches_detection = (
|
|
|
| ~gt_is_tp
|
|
|
| & (gt_ious[:, :, tf.newaxis] > self._iou_thresholds)
|
|
|
| & (
|
| (sorted_detection_classes[:, i][:, tf.newaxis] == gt_classes)
|
| & (gt_classes > 0)
|
| )[:, :, tf.newaxis]
|
| )
|
|
|
|
|
|
|
|
|
| matched_gt_with_max_iou = tf.argmax(
|
| tf.cast(gt_matches_detection, gt_ious.dtype)
|
| * gt_ious[:, :, tf.newaxis],
|
| axis=1,
|
| output_type=tf.int32,
|
| )
|
|
|
| gt_matches_detection &= tf.one_hot(
|
| matched_gt_with_max_iou,
|
| depth=num_gts,
|
| on_value=True,
|
| off_value=False,
|
| axis=1,
|
| )
|
|
|
|
|
|
|
|
|
| detection_is_tp |= (
|
| tf.reduce_any(gt_matches_detection, axis=1, keepdims=True)
|
| & tf.one_hot(
|
| sorted_detection_indices[:, i],
|
| depth=num_detections,
|
| on_value=True,
|
| off_value=False,
|
| axis=-1,
|
| )[:, :, tf.newaxis]
|
| )
|
| detection_is_tp.set_shape([None, num_detections, num_iou_thresholds])
|
|
|
|
|
|
|
| gt_is_tp |= gt_matches_detection
|
| gt_is_tp.set_shape([None, num_gts, num_iou_thresholds])
|
|
|
|
|
| return (i + 1, detection_is_tp, gt_is_tp)
|
|
|
| _, detection_is_tp_result, gt_is_tp_result = tf.while_loop(
|
| cond=lambda i, *_: i < num_detections,
|
| body=_match_detection_to_gt_loop_body,
|
| loop_vars=init_loop_vars,
|
| parallel_iterations=32,
|
| maximum_iterations=num_detections,
|
| )
|
| return detection_is_tp_result, gt_is_tp_result
|
|
|
|
|
| def _shift_and_rescale_boxes(
|
| boxes: tf.Tensor,
|
| output_boundary: Tuple[int, int],
|
| ) -> tf.Tensor:
|
| """Shift and rescale the boxes to fit in the output boundary.
|
|
|
| The output boundary of the boxes can be smaller than the original image size
|
| for accelerating the downstream calculations (dynamic mask resizing, mask IoU,
|
| etc.).
|
|
|
| For each image of the batch:
|
| (1) find the upper boundary (min_ymin) and the left boundary (min_xmin) of all
|
| the boxes.
|
| (2) shift all the boxes up min_ymin pixels and left min_xmin pixels.
|
| (3) find the new lower boundary (max_ymax) and the right boundary (max_xmax)
|
| of all the boxes.
|
| (4) if max_ymax > output_height or max_xmax > output_width (some boxes don't
|
| fit in the output boundary), downsample all the boxes by ratio:
|
| min(output_height / max_ymax, output_width / max_xmax). The aspect ratio
|
| is not changed.
|
|
|
| Args:
|
| boxes: a tensor with a shape of [batch_size, N, 4]. The last dimension is
|
| the pixel coordinates in [ymin, xmin, ymax, xmax] form.
|
| output_boundary: two integers that represent the height and width of the
|
| output.
|
|
|
| Returns:
|
| The tensor [batch_size, N, 4] of the output boxes.
|
| """
|
| boxes = tf.cast(boxes, dtype=tf.float32)
|
|
|
|
|
| is_valid_box = tf.reduce_any(
|
| (boxes[:, :, 2:4] - boxes[:, :, 0:2]) > 0, axis=-1, keepdims=True
|
| )
|
|
|
|
|
| min_ymin_xmin = tf.reduce_min(
|
| tf.where(is_valid_box, boxes, np.inf)[:, :, 0:2],
|
| axis=1,
|
| )
|
|
|
| boxes = tf.where(
|
| is_valid_box,
|
| boxes - tf.tile(min_ymin_xmin, [1, 2])[:, tf.newaxis, :],
|
| 0.0,
|
| )
|
|
|
|
|
| max_ymax = tf.reduce_max(boxes[:, :, 2], axis=1)
|
| max_xmax = tf.reduce_max(boxes[:, :, 3], axis=1)
|
|
|
| y_resize_ratio = output_boundary[0] / max_ymax
|
| x_resize_ratio = output_boundary[1] / max_xmax
|
|
|
| downsampling_ratio = tf.math.minimum(
|
| tf.math.minimum(y_resize_ratio, x_resize_ratio), 1.0
|
| )
|
|
|
| return boxes * downsampling_ratio[:, tf.newaxis, tf.newaxis]
|
|
|
|
|
| def _count_detection_type(
|
| detection_type_mask: tf.Tensor,
|
| detection_classes: tf.Tensor,
|
| flattened_binned_confidence_one_hot: tf.Tensor,
|
| num_classes: int,
|
| ) -> tf.Tensor:
|
| """Counts detection type grouped by IoU thresholds, classes and confidence bins.
|
|
|
| Args:
|
| detection_type_mask: a bool tensor in shape of (batch_size, num_detections,
|
| num_iou_thresholds), which indicate a certain type of detections (e.g.
|
| true postives).
|
| detection_classes: a tensor in shape of (batch_size, num_detections) which
|
| stores the classes of the detections.
|
| flattened_binned_confidence_one_hot: a one-hot bool tensor in shape of
|
| (batch_size * num_detections, num_confidence_bins + 1) which indicates the
|
| binned confidence score of each detection.
|
| num_classes: the number of classes.
|
|
|
| Returns:
|
| A tensor in shape of (num_iou_thresholds, num_classes,
|
| num_confidence_bins + 1) which stores the count grouped by IoU thresholds,
|
| classes and confidence bins.
|
| """
|
| num_iou_thresholds = detection_type_mask.get_shape()[-1]
|
|
|
|
|
| masked_classes = tf.where(
|
| detection_type_mask, detection_classes[..., tf.newaxis], -1
|
| )
|
|
|
| flattened_masked_classes = tf.transpose(
|
| tf.reshape(masked_classes, [-1, num_iou_thresholds])
|
| )
|
|
|
| flattened_masked_classes_one_hot = tf.one_hot(
|
| flattened_masked_classes, depth=num_classes, axis=1
|
| )
|
|
|
| flattened_masked_classes_one_hot = tf.reshape(
|
| flattened_masked_classes_one_hot,
|
| [num_iou_thresholds * num_classes, -1],
|
| )
|
|
|
|
|
| count = tf.matmul(
|
| flattened_masked_classes_one_hot,
|
| tf.cast(flattened_binned_confidence_one_hot, tf.float32),
|
| a_is_sparse=True,
|
| b_is_sparse=True,
|
| )
|
|
|
| count = tf.reshape(count, [num_iou_thresholds, num_classes, -1])
|
|
|
| count *= 1.0 - tf.eye(num_classes, 1, dtype=count.dtype)
|
| return count
|
|
|
|
|
| class InstanceMetrics(tf_keras.metrics.Metric):
|
| """Reports the metrics of instance detection & segmentation."""
|
|
|
| def __init__(
|
| self,
|
| num_classes: int,
|
| use_masks: bool = False,
|
| iou_thresholds: Tuple[float, ...] = (0.5,),
|
| confidence_thresholds: Tuple[float, ...] = (),
|
| num_confidence_bins: int = 1000,
|
| mask_output_boundary: Tuple[int, int] = (640, 640),
|
| matching_algorithm: Optional[MatchingAlgorithm] = None,
|
| average_precision_algorithms: Optional[
|
| Dict[str, AveragePrecision]
|
| ] = None,
|
| name: Optional[str] = None,
|
| dtype: Optional[Union[str, tf.dtypes.DType]] = tf.float32,
|
| **kwargs
|
| ):
|
| """Initialization for AveragePrecision.
|
|
|
| Args:
|
| num_classes: the number of classes.
|
| use_masks: if true, use the masks of the instances when calculating the
|
| metrics, otherwise use the boxes.
|
| iou_thresholds: a sequence of IoU thresholds over which to calculate the
|
| instance metrics.
|
| confidence_thresholds: a sequence of confidence thresholds. If set, also
|
| report precision and recall at each confidence threshold, otherwise,
|
| only report average precision.
|
| num_confidence_bins: the number of confidence bins used for bin sort.
|
| mask_output_boundary: two integers that represent the height and width of
|
| the boundary where the resized instance masks are pasted. For each
|
| example, if any of the detection or ground truth boxes is out of the
|
| boundary, shift and resize all the detection and ground truth boxes of
|
| the example to fit them into the boundary. The output boundary of the
|
| pasted masks can be smaller than the real image size for accelerating
|
| the calculation.
|
| matching_algorithm: the algorithm which matches detections to ground
|
| truths.
|
| average_precision_algorithms: the algorithms which compute average
|
| precision from P-R curve. The keys are used in the metrics results.
|
| name: the name of the metric instance.
|
| dtype: data type of the metric result.
|
| **kwargs: Additional keywords arguments.
|
| """
|
| super().__init__(name=name, dtype=dtype, **kwargs)
|
| self._num_classes = num_classes
|
| self._use_masks = use_masks
|
| self._iou_thresholds = iou_thresholds
|
| self._confidence_thresholds = confidence_thresholds
|
| self._num_iou_thresholds = len(iou_thresholds)
|
| self._num_confidence_bins = num_confidence_bins
|
| self._mask_output_boundary = mask_output_boundary
|
| if not matching_algorithm:
|
| self._matching_algorithm = COCOMatchingAlgorithm(iou_thresholds)
|
| else:
|
| self._matching_algorithm = matching_algorithm
|
| if not average_precision_algorithms:
|
| self._average_precision_algorithms = {'ap': COCOAveragePrecision()}
|
| else:
|
| self._average_precision_algorithms = average_precision_algorithms
|
|
|
|
|
| self.tp_count = self.add_weight(
|
| 'tp_count',
|
| shape=[
|
| self._num_iou_thresholds,
|
| self._num_classes,
|
| self._num_confidence_bins + 1,
|
| ],
|
| initializer='zeros',
|
| dtype=tf.float32,
|
| )
|
| self.fp_count = self.add_weight(
|
| 'fp_count',
|
| shape=[
|
| self._num_iou_thresholds,
|
| self._num_classes,
|
| self._num_confidence_bins + 1,
|
| ],
|
| initializer='zeros',
|
| dtype=tf.float32,
|
| )
|
| self.gt_count = self.add_weight(
|
| 'gt_count',
|
| shape=[self._num_classes],
|
| initializer='zeros',
|
| dtype=tf.float32,
|
| )
|
|
|
| def get_config(self) -> Dict[str, Any]:
|
| """Returns the serializable config of the metric."""
|
| return {
|
| 'num_classes': self._num_classes,
|
| 'use_masks': self._use_masks,
|
| 'iou_thresholds': self._iou_thresholds,
|
| 'confidence_thresholds': self._confidence_thresholds,
|
| 'num_confidence_bins': self._num_confidence_bins,
|
| 'mask_output_boundary': self._mask_output_boundary,
|
| 'matching_algorithm': self._matching_algorithm,
|
| 'average_precision_algorithms': self._average_precision_algorithms,
|
| 'name': self.name,
|
| 'dtype': self.dtype,
|
| }
|
|
|
| def reset_state(self):
|
| """Resets all of the metric state variables."""
|
| self.tp_count.assign(tf.zeros_like(self.tp_count))
|
| self.fp_count.assign(tf.zeros_like(self.fp_count))
|
| self.gt_count.assign(tf.zeros_like(self.gt_count))
|
|
|
| def update_state(
|
| self, y_true: Dict[str, tf.Tensor], y_pred: Dict[str, tf.Tensor]
|
| ):
|
|
|
| detection_boxes = tf.cast(y_pred['detection_boxes'], tf.float32)
|
|
|
| detection_classes = tf.cast(y_pred['detection_classes'], tf.int32)
|
|
|
| detection_scores = tf.cast(y_pred['detection_scores'], tf.float32)
|
|
|
| gt_boxes = tf.cast(y_true['boxes'], tf.float32)
|
|
|
| gt_classes = tf.cast(y_true['classes'], tf.int32)
|
|
|
| if 'is_crowds' in y_true:
|
| gt_is_crowd = tf.cast(y_true['is_crowds'], tf.bool)
|
| else:
|
| gt_is_crowd = tf.zeros_like(gt_classes, dtype=tf.bool)
|
|
|
| image_scale = tf.tile(y_true['image_info'][:, 2:3, :], multiples=[1, 1, 2])
|
| detection_boxes = detection_boxes / tf.cast(
|
| image_scale, dtype=detection_boxes.dtype
|
| )
|
|
|
|
|
|
|
| if not self._use_masks:
|
|
|
| detection_to_gt_ious = box_ops.bbox_overlap(detection_boxes, gt_boxes)
|
| detection_to_gt_ioas = box_ops.bbox_intersection_over_area(
|
| detection_boxes, gt_boxes
|
| )
|
| else:
|
|
|
| if 'detection_outer_boxes' in y_pred:
|
| detection_boxes = tf.cast(y_pred['detection_outer_boxes'], tf.float32)
|
|
|
|
|
| detection_masks = tf.cast(y_pred['detection_masks'], tf.float32)
|
|
|
| gt_masks = tf.cast(y_true['masks'], tf.float32)
|
|
|
| num_detections = detection_boxes.get_shape()[1]
|
|
|
| all_boxes = _shift_and_rescale_boxes(
|
| tf.concat([detection_boxes, gt_boxes], axis=1),
|
| self._mask_output_boundary,
|
| )
|
| detection_boxes = all_boxes[:, :num_detections, :]
|
| gt_boxes = all_boxes[:, num_detections:, :]
|
|
|
| detection_to_gt_ious, detection_to_gt_ioas = (
|
| mask_ops.instance_masks_overlap(
|
| detection_boxes,
|
| detection_masks,
|
| gt_boxes,
|
| gt_masks,
|
| output_size=self._mask_output_boundary,
|
| )
|
| )
|
|
|
| detection_to_gt_ious = tf.where(
|
| gt_is_crowd[:, tf.newaxis, :], 0.0, detection_to_gt_ious
|
| )
|
| detection_to_crowd_ioas = tf.where(
|
| gt_is_crowd[:, tf.newaxis, :], detection_to_gt_ioas, 0.0
|
| )
|
|
|
|
|
|
|
|
|
|
|
| detection_is_tp, _ = self._matching_algorithm(
|
| detection_to_gt_ious, detection_classes, detection_scores, gt_classes
|
| )
|
|
|
| flattened_binned_confidence = tf.reshape(
|
| tf.cast(detection_scores * self._num_confidence_bins, tf.int32), [-1]
|
| )
|
|
|
| flattened_binned_confidence_one_hot = tf.one_hot(
|
| flattened_binned_confidence, self._num_confidence_bins + 1, axis=1
|
| )
|
|
|
| tp_count = _count_detection_type(
|
| detection_is_tp,
|
| detection_classes,
|
| flattened_binned_confidence_one_hot,
|
| self._num_classes,
|
| )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| detection_matches_crowd = (
|
| (detection_to_crowd_ioas[..., tf.newaxis] > self._iou_thresholds)
|
| & (
|
| detection_classes[:, :, tf.newaxis, tf.newaxis]
|
| == gt_classes[:, tf.newaxis, :, tf.newaxis]
|
| )
|
| & (detection_classes[:, :, tf.newaxis, tf.newaxis] > 0)
|
| )
|
|
|
| detection_matches_any_crowd = tf.reduce_any(
|
| detection_matches_crowd & ~detection_is_tp[:, :, tf.newaxis, :], axis=2
|
| )
|
| detection_is_fp = ~detection_is_tp & ~detection_matches_any_crowd
|
|
|
| fp_count = _count_detection_type(
|
| detection_is_fp,
|
| detection_classes,
|
| flattened_binned_confidence_one_hot,
|
| self._num_classes,
|
| )
|
|
|
|
|
|
|
| gt_count = tf.reduce_sum(
|
| tf.one_hot(
|
| tf.where(gt_is_crowd, -1, gt_classes), self._num_classes, axis=-1
|
| ),
|
| axis=[0, 1],
|
| )
|
|
|
| gt_count *= 1.0 - tf.eye(1, self._num_classes, dtype=gt_count.dtype)[0]
|
|
|
|
|
| self.fp_count.assign_add(tf.cast(fp_count, self.fp_count.dtype))
|
| self.tp_count.assign_add(tf.cast(tp_count, self.tp_count.dtype))
|
| self.gt_count.assign_add(tf.cast(gt_count, self.gt_count.dtype))
|
|
|
| def result(self) -> Dict[str, tf.Tensor]:
|
| """Returns the metrics values as a dict.
|
|
|
| Returns:
|
| A `dict` containing:
|
| 'ap': a float tensor in shape (num_iou_thresholds, num_classes) which
|
| stores the average precision of each class at different IoU thresholds.
|
| 'precision': a float tensor in shape (num_confidence_thresholds,
|
| num_iou_thresholds, num_classes) which stores the precision of each
|
| class at different confidence thresholds & IoU thresholds.
|
| 'recall': a float tensor in shape (num_confidence_thresholds,
|
| num_iou_thresholds, num_classes) which stores the recall of each
|
| class at different confidence thresholds & IoU thresholds.
|
| 'valid_classes': a bool tensor in shape (num_classes,). If False, there
|
| is no instance of the class in the ground truth.
|
| """
|
| result = {
|
|
|
| 'valid_classes': self.gt_count != 0,
|
| }
|
|
|
|
|
| tp_count_cum_by_confidence = tf.math.cumsum(
|
| self.tp_count, axis=-1, reverse=True
|
| )
|
|
|
| fp_count_cum_by_confidence = tf.math.cumsum(
|
| self.fp_count, axis=-1, reverse=True
|
| )
|
|
|
|
|
| precisions = tf.math.divide_no_nan(
|
| tp_count_cum_by_confidence,
|
| tp_count_cum_by_confidence + fp_count_cum_by_confidence,
|
| )
|
|
|
| recalls = tf.math.divide_no_nan(
|
| tp_count_cum_by_confidence, self.gt_count[..., tf.newaxis]
|
| )
|
|
|
| if self._confidence_thresholds:
|
|
|
|
|
| confidence_thresholds = tf.cast(
|
| tf.constant(self._confidence_thresholds, dtype=tf.float32)
|
| * self._num_confidence_bins,
|
| dtype=tf.int32,
|
| )
|
|
|
| result['precisions'] = tf.gather(
|
| tf.transpose(precisions, [2, 0, 1]), confidence_thresholds
|
| )
|
| result['recalls'] = tf.gather(
|
| tf.transpose(recalls, [2, 0, 1]), confidence_thresholds
|
| )
|
|
|
| precisions = tf.reverse(precisions, axis=[-1])
|
| recalls = tf.reverse(recalls, axis=[-1])
|
| result.update(
|
| {
|
|
|
| key: ap_algorithm(precisions, recalls)
|
| for key, ap_algorithm in self._average_precision_algorithms.items()
|
| }
|
| )
|
| return result
|
|
|
| def get_average_precision_metrics_keys(self):
|
| """Gets the keys of the average precision metrics in the results."""
|
| return self._average_precision_algorithms.keys()
|
|
|