File size: 40,903 Bytes

663494c

import json
import torch
import tqdm
from typing import List, Dict, Tuple, Callable, Union
from nuscenes import NuScenes
from pyquaternion import Quaternion
import numpy as np
from .metric_utils import min_ade, min_fde, miss_rate

from nuscenes.utils.splits import create_splits_scenes
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.prediction import PredictHelper, convert_local_coords_to_global
from nuscenes.eval.common.data_classes import EvalBox, EvalBoxes
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.data_classes import (
    DetectionMetricData,
    DetectionMetricDataList,
    DetectionMetrics,
)
from nuscenes.eval.common.utils import (
    center_distance,
    scale_iou,
    yaw_diff,
    velocity_l2,
    attr_acc,
    cummean,
)


def category_to_motion_name(category_name: str):
    """
    Default label mapping from nuScenes to nuScenes detection classes.
    Note that pedestrian does not include personal_mobility, stroller and wheelchair.
    :param category_name: Generic nuScenes class.
    :return: nuScenes detection class.
    """
    detection_mapping = {
        "movable_object.barrier": "barrier",
        "vehicle.bicycle": "car",
        "vehicle.bus.bendy": "car",
        "vehicle.bus.rigid": "car",
        "vehicle.car": "car",
        "vehicle.construction": "car",
        "vehicle.motorcycle": "car",
        "human.pedestrian.adult": "pedestrian",
        "human.pedestrian.child": "pedestrian",
        "human.pedestrian.construction_worker": "pedestrian",
        "human.pedestrian.police_officer": "pedestrian",
        "movable_object.trafficcone": "barrier",
        "vehicle.trailer": "car",
        "vehicle.truck": "car",
    }

    if category_name in detection_mapping:
        return detection_mapping[category_name]
    else:
        return None


def detection_prediction_category_to_motion_name(category_name: str):
    """
    Default label mapping from nuScenes to nuScenes detection classes.
    Note that pedestrian does not include personal_mobility, stroller and wheelchair.
    :param category_name: Generic nuScenes class.
    :return: nuScenes detection class.
    """
    detection_mapping = {
        "car": "car",
        "truck": "car",
        "construction_vehicle": "car",
        "bus": "car",
        "trailer": "car",
        "motorcycle": "car",
        "bicycle": "car",
        "pedestrian": "pedestrian",
        "traffic_cone": "barrier",
        "barrier": "barrier",
    }

    if category_name in detection_mapping:
        return detection_mapping[category_name]
    else:
        return None


class DetectionMotionMetrics(DetectionMetrics):
    """ Stores average precision and true positive metric results. Provides properties to summarize. """

    @classmethod
    def deserialize(cls, content: dict):
        """ Initialize from serialized dictionary. """

        cfg = DetectionConfig.deserialize(content["cfg"])
        metrics = cls(cfg=cfg)
        metrics.add_runtime(content["eval_time"])

        for detection_name, label_aps in content["label_aps"].items():
            for dist_th, ap in label_aps.items():
                metrics.add_label_ap(
                    detection_name=detection_name, dist_th=float(dist_th), ap=float(ap)
                )

        for detection_name, label_tps in content["label_tp_errors"].items():
            for metric_name, tp in label_tps.items():
                metrics.add_label_tp(
                    detection_name=detection_name, metric_name=metric_name, tp=float(tp)
                )

        return metrics


class DetectionMotionMetricDataList(DetectionMetricDataList):
    """ This stores a set of MetricData in a dict indexed by (name, match-distance). """

    @classmethod
    def deserialize(cls, content: dict):
        mdl = cls()
        for key, md in content.items():
            name, distance = key.split(":")
            mdl.set(name, float(distance), DetectionMotionMetricData.deserialize(md))
        return mdl


class DetectionMotionMetricData(DetectionMetricData):
    """ This class holds accumulated and interpolated data required to calculate the detection metrics. """

    nelem = 101

    def __init__(
        self,
        recall: np.array,
        precision: np.array,
        confidence: np.array,
        trans_err: np.array,
        vel_err: np.array,
        scale_err: np.array,
        orient_err: np.array,
        attr_err: np.array,
        min_ade_err: np.array,
        min_fde_err: np.array,
        miss_rate_err: np.array,
    ):

        # Assert lengths.
        assert len(recall) == self.nelem
        assert len(precision) == self.nelem
        assert len(confidence) == self.nelem
        assert len(trans_err) == self.nelem
        assert len(vel_err) == self.nelem
        assert len(scale_err) == self.nelem
        assert len(orient_err) == self.nelem
        assert len(attr_err) == self.nelem
        assert len(min_ade_err) == self.nelem
        assert len(min_fde_err) == self.nelem
        assert len(miss_rate_err) == self.nelem

        # Assert ordering.
        assert all(
            confidence == sorted(confidence, reverse=True)
        )  # Confidences should be descending.
        assert all(recall == sorted(recall))  # Recalls should be ascending.

        # Set attributes explicitly to help IDEs figure out what is going on.
        self.recall = recall
        self.precision = precision
        self.confidence = confidence
        self.trans_err = trans_err
        self.vel_err = vel_err
        self.scale_err = scale_err
        self.orient_err = orient_err
        self.attr_err = attr_err
        self.min_ade_err = min_ade_err
        self.min_fde_err = min_fde_err
        self.miss_rate_err = miss_rate_err

    def __eq__(self, other):
        eq = True
        for key in self.serialize().keys():
            eq = eq and np.array_equal(getattr(self, key), getattr(other, key))
        return eq

    @property
    def max_recall_ind(self):
        """ Returns index of max recall achieved. """

        # Last instance of confidence > 0 is index of max achieved recall.
        non_zero = np.nonzero(self.confidence)[0]
        if (
            len(non_zero) == 0
        ):  # If there are no matches, all the confidence values will be zero.
            max_recall_ind = 0
        else:
            max_recall_ind = non_zero[-1]

        return max_recall_ind

    @property
    def max_recall(self):
        """ Returns max recall achieved. """

        return self.recall[self.max_recall_ind]

    def serialize(self):
        """ Serialize instance into json-friendly format. """
        return {
            "recall": self.recall.tolist(),
            "precision": self.precision.tolist(),
            "confidence": self.confidence.tolist(),
            "trans_err": self.trans_err.tolist(),
            "vel_err": self.vel_err.tolist(),
            "scale_err": self.scale_err.tolist(),
            "orient_err": self.orient_err.tolist(),
            "attr_err": self.attr_err.tolist(),
            "min_ade_err": self.min_ade_err.tolist(),
            "min_fde_err": self.min_fde_err.tolist(),
            "miss_rate_err": self.miss_rate_err.tolist(),
        }

    @classmethod
    def deserialize(cls, content: dict):
        """ Initialize from serialized content. """
        return cls(
            recall=np.array(content["recall"]),
            precision=np.array(content["precision"]),
            confidence=np.array(content["confidence"]),
            trans_err=np.array(content["trans_err"]),
            vel_err=np.array(content["vel_err"]),
            scale_err=np.array(content["scale_err"]),
            orient_err=np.array(content["orient_err"]),
            attr_err=np.array(content["attr_err"]),
            min_ade_err=np.array(content["min_ade_err"]),
            min_fde_err=np.array(content["min_fde_err"]),
            miss_rate_err=np.array(content["miss_rate_err"]),
        )

    @classmethod
    def no_predictions(cls):
        """ Returns a md instance corresponding to having no predictions. """
        return cls(
            recall=np.linspace(0, 1, cls.nelem),
            precision=np.zeros(cls.nelem),
            confidence=np.zeros(cls.nelem),
            trans_err=np.ones(cls.nelem),
            vel_err=np.ones(cls.nelem),
            scale_err=np.ones(cls.nelem),
            orient_err=np.ones(cls.nelem),
            attr_err=np.ones(cls.nelem),
            min_ade_err=np.ones(cls.nelem),
            min_fde_err=np.ones(cls.nelem),
            miss_rate_err=np.ones(cls.nelem),
        )

    @classmethod
    def random_md(cls):
        """ Returns an md instance corresponding to a random results. """
        return cls(
            recall=np.linspace(0, 1, cls.nelem),
            precision=np.random.random(cls.nelem),
            confidence=np.linspace(0, 1, cls.nelem)[::-1],
            trans_err=np.random.random(cls.nelem),
            vel_err=np.random.random(cls.nelem),
            scale_err=np.random.random(cls.nelem),
            orient_err=np.random.random(cls.nelem),
            attr_err=np.random.random(cls.nelem),
            min_ade_err=np.random.random(cls.nelem),
            min_fde_err=np.random.random(cls.nelem),
            miss_rate_err=np.random.random(cls.nelem),
        )


class DetectionMotionBox(DetectionBox):
    def __init__(
        self,
        sample_token: str = "",
        translation: Tuple[float, float, float] = (0, 0, 0),
        size: Tuple[float, float, float] = (0, 0, 0),
        rotation: Tuple[float, float, float, float] = (0, 0, 0, 0),
        velocity: Tuple[float, float] = (0, 0),
        ego_translation: [float, float, float] = (
            0,
            0,
            0,
        ),  # Translation to ego vehicle in meters.
        num_pts: int = -1,  # Nbr. LIDAR or RADAR inside the box. Only for gt boxes.
        detection_name: str = "car",  # The class name used in the detection challenge.
        detection_score: float = -1.0,  # GT samples do not have a score.
        attribute_name: str = "",
        traj=None,
        traj_scores=None,
    ):  # Box attribute. Each box can have at most 1 attribute.
        super(DetectionBox, self).__init__(
            sample_token,
            translation,
            size,
            rotation,
            velocity,
            ego_translation,
            num_pts,
        )
        assert detection_name is not None, "Error: detection_name cannot be empty!"
        # assert detection_name in DETECTION_NAMES, 'Error: Unknown detection_name %s' % detection_name

        # assert attribute_name in ATTRIBUTE_NAMES or attribute_name == '', \
        #     'Error: Unknown attribute_name %s' % attribute_name

        assert type(detection_score) == float, "Error: detection_score must be a float!"
        assert not np.any(
            np.isnan(detection_score)
        ), "Error: detection_score may not be NaN!"

        # Assign.
        self.detection_name = detection_name
        self.attribute_name = attribute_name
        self.detection_score = detection_score
        self.traj = traj
        self.traj_scores = traj_scores
        self.traj_index = None

    def __eq__(self, other):
        return (
            self.sample_token == other.sample_token
            and self.translation == other.translation
            and self.size == other.size
            and self.rotation == other.rotation
            and self.velocity == other.velocity
            and self.ego_translation == other.ego_translation
            and self.num_pts == other.num_pts
            and self.detection_name == other.detection_name
            and self.detection_score == other.detection_score
            and self.attribute_name == other.attribute_name
            and np.all(self.traj == other.traj)
            and np.all(self.traj_scores == other.traj_scores)
        )

    def serialize(self) -> dict:
        """ Serialize instance into json-friendly format. """
        return {
            "sample_token": self.sample_token,
            "translation": self.translation,
            "size": self.size,
            "rotation": self.rotation,
            "velocity": self.velocity,
            "ego_translation": self.ego_translation,
            "num_pts": self.num_pts,
            "detection_name": self.detection_name,
            "detection_score": self.detection_score,
            "attribute_name": self.attribute_name,
            "traj": self.traj,
            "traj_scores": self.traj_scores,
        }

    @classmethod
    def deserialize(cls, content: dict):
        """ Initialize from serialized content. """
        return cls(
            sample_token=content["sample_token"],
            translation=tuple(content["translation"]),
            size=tuple(content["size"]),
            rotation=tuple(content["rotation"]),
            velocity=tuple(content["velocity"]),
            ego_translation=(0.0, 0.0, 0.0)
            if "ego_translation" not in content
            else tuple(content["ego_translation"]),
            num_pts=-1 if "num_pts" not in content else int(content["num_pts"]),
            detection_name=content["detection_name"],
            detection_score=-1.0
            if "detection_score" not in content
            else float(content["detection_score"]),
            attribute_name=content["attribute_name"],
            traj=content["predict_traj"],
            traj_scores=content["predict_traj_score"],
        )


class DetectionMotionBox_modified(DetectionMotionBox):
    def __init__(self, *args, token=None, visibility=None, index=None, **kwargs):
        """
        add annotation token
        """
        super().__init__(*args, **kwargs)
        self.token = token
        self.visibility = visibility
        self.index = index

    def serialize(self) -> dict:
        """ Serialize instance into json-friendly format. """
        return {
            "token": self.token,
            "sample_token": self.sample_token,
            "translation": self.translation,
            "size": self.size,
            "rotation": self.rotation,
            "velocity": self.velocity,
            "ego_translation": self.ego_translation,
            "num_pts": self.num_pts,
            "detection_name": self.detection_name,
            "detection_score": self.detection_score,
            "attribute_name": self.attribute_name,
            "visibility": self.visibility,
            "index": self.index,
            "traj": self.traj,
            "traj_scores": self.traj_scores,
        }

    @classmethod
    def deserialize(cls, content: dict):
        """ Initialize from serialized content. """
        return cls(
            token=content["token"],
            sample_token=content["sample_token"],
            translation=tuple(content["translation"]),
            size=tuple(content["size"]),
            rotation=tuple(content["rotation"]),
            velocity=tuple(content["velocity"]),
            ego_translation=(0.0, 0.0, 0.0)
            if "ego_translation" not in content
            else tuple(content["ego_translation"]),
            num_pts=-1 if "num_pts" not in content else int(content["num_pts"]),
            detection_name=content["detection_name"],
            detection_score=-1.0
            if "detection_score" not in content
            else float(content["detection_score"]),
            attribute_name=content["attribute_name"],
            visibility=content["visibility"],
            index=content["index"],
            traj=content["traj"],
        )


def load_prediction(
    result_path: str,
    max_boxes_per_sample: int,
    box_cls,
    verbose: bool = False,
    category_convert_type="detection_category",
) -> Tuple[EvalBoxes, Dict]:
    """
    Loads object predictions from file.
    :param result_path: Path to the .json result file provided by the user.
    :param max_boxes_per_sample: Maximim number of boxes allowed per sample.
    :param box_cls: Type of box to load, e.g. DetectionBox, DetectionMotionBox or TrackingBox.
    :param verbose: Whether to print messages to stdout.
    :return: The deserialized results and meta data.
    """

    # Load from file and check that the format is correct.
    with open(result_path) as f:
        data = json.load(f)
    assert "results" in data, (
        "Error: No field `results` in result file. Please note that the result format changed."
        "See https://www.nuscenes.org/object-detection for more information."
    )

    if category_convert_type == "motion_category":
        for key in data["results"].keys():
            for i in range(len(data["results"][key])):
                data["results"][key][i][
                    "detection_name"
                ] = detection_prediction_category_to_motion_name(
                    data["results"][key][i]["detection_name"]
                )
    # Deserialize results and get meta data.
    all_results = EvalBoxes.deserialize(data["results"], box_cls)
    meta = data["meta"]
    if verbose:
        print(
            "Loaded results from {}. Found detections for {} samples.".format(
                result_path, len(all_results.sample_tokens)
            )
        )

    # Check that each sample has no more than x predicted boxes.
    for sample_token in all_results.sample_tokens:
        assert len(all_results.boxes[sample_token]) <= max_boxes_per_sample, (
            "Error: Only <= %d boxes per sample allowed!" % max_boxes_per_sample
        )

    return all_results, meta


def load_gt(
    nusc: NuScenes,
    eval_split: str,
    box_cls,
    verbose: bool = False,
    category_convert_type="detection_category",
):
    """
    Loads ground truth boxes from DB.
    :param nusc: A NuScenes instance.
    :param eval_split: The evaluation split for which we load GT boxes.
    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
    :param verbose: Whether to print messages to stdout.
    :return: The GT boxes.
    """
    predict_helper = PredictHelper(nusc)
    # Init.
    if box_cls == DetectionMotionBox_modified:
        attribute_map = {a["token"]: a["name"] for a in nusc.attribute}

    if verbose:
        print(
            "Loading annotations for {} split from nuScenes version: {}".format(
                eval_split, nusc.version
            )
        )
    # Read out all sample_tokens in DB.
    sample_tokens_all = [s["token"] for s in nusc.sample]
    assert len(sample_tokens_all) > 0, "Error: Database has no samples!"

    # Only keep samples from this split.
    splits = create_splits_scenes()

    # Check compatibility of split with nusc_version.
    version = nusc.version
    if eval_split in {"train", "val", "train_detect", "train_track"}:
        assert version.endswith(
            "trainval"
        ), "Error: Requested split {} which is not compatible with NuScenes version {}".format(
            eval_split, version
        )
    elif eval_split in {"mini_train", "mini_val"}:
        assert version.endswith(
            "mini"
        ), "Error: Requested split {} which is not compatible with NuScenes version {}".format(
            eval_split, version
        )
    elif eval_split == "test":
        assert version.endswith(
            "test"
        ), "Error: Requested split {} which is not compatible with NuScenes version {}".format(
            eval_split, version
        )
    else:
        raise ValueError(
            "Error: Requested split {} which this function cannot map to the correct NuScenes version.".format(
                eval_split
            )
        )

    if eval_split == "test":
        # Check that you aren't trying to cheat :).
        assert (
            len(nusc.sample_annotation) > 0
        ), "Error: You are trying to evaluate on the test set but you do not have the annotations!"
    index_map = {}
    for scene in nusc.scene:
        first_sample_token = scene["first_sample_token"]
        sample = nusc.get("sample", first_sample_token)
        index_map[first_sample_token] = 1
        index = 2
        while sample["next"] != "":
            sample = nusc.get("sample", sample["next"])
            index_map[sample["token"]] = index
            index += 1

    sample_tokens = []
    for sample_token in sample_tokens_all:
        scene_token = nusc.get("sample", sample_token)["scene_token"]
        scene_record = nusc.get("scene", scene_token)
        if scene_record["name"] in splits[eval_split]:
            sample_tokens.append(sample_token)

    all_annotations = EvalBoxes()

    # Load annotations and filter predictions and annotations.
    tracking_id_set = set()
    for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):

        sample = nusc.get("sample", sample_token)
        sample_annotation_tokens = sample["anns"]

        sample_boxes = []
        for sample_annotation_token in sample_annotation_tokens:

            sample_annotation = nusc.get("sample_annotation", sample_annotation_token)
            if box_cls == DetectionMotionBox_modified:
                # Get label name in detection task and filter unused labels.
                if category_convert_type == "detection_category":
                    detection_name = category_to_detection_name(
                        sample_annotation["category_name"]
                    )
                elif category_convert_type == "motion_category":
                    detection_name = category_to_motion_name(
                        sample_annotation["category_name"]
                    )
                else:
                    raise NotImplementedError
                if detection_name is None:
                    continue
                # Get attribute_name.
                attr_tokens = sample_annotation["attribute_tokens"]
                attr_count = len(attr_tokens)
                if attr_count == 0:
                    attribute_name = ""
                elif attr_count == 1:
                    attribute_name = attribute_map[attr_tokens[0]]
                else:
                    raise Exception(
                        "Error: GT annotations must not have more than one attribute!"
                    )
                instance_token = nusc.get(
                    "sample_annotation", sample_annotation["token"]
                )["instance_token"]
                fut_traj_local = predict_helper.get_future_for_agent(
                    instance_token, sample_token, seconds=6, in_agent_frame=True
                )
                fut_traj_scence_centric = np.zeros((0,))
                if fut_traj_local.shape[0] > 0:
                    _, boxes, _ = nusc.get_sample_data(
                        sample["data"]["LIDAR_TOP"],
                        selected_anntokens=[sample_annotation["token"]],
                    )
                    box = boxes[0]
                    trans = box.center
                    rot = Quaternion(matrix=box.rotation_matrix)
                    fut_traj_scence_centric = convert_local_coords_to_global(
                        fut_traj_local, trans, rot
                    )

                sample_boxes.append(
                    box_cls(
                        token=sample_annotation_token,
                        sample_token=sample_token,
                        translation=sample_annotation["translation"],
                        size=sample_annotation["size"],
                        rotation=sample_annotation["rotation"],
                        velocity=nusc.box_velocity(sample_annotation["token"])[:2],
                        num_pts=sample_annotation["num_lidar_pts"]
                        + sample_annotation["num_radar_pts"],
                        detection_name=detection_name,
                        detection_score=-1.0,  # GT samples do not have a score.
                        attribute_name=attribute_name,
                        visibility=sample_annotation["visibility_token"],
                        index=index_map[sample_token],
                        traj=fut_traj_scence_centric,
                    )
                )
            elif box_cls == TrackingBox:
                assert False
            else:
                raise NotImplementedError("Error: Invalid box_cls %s!" % box_cls)

        all_annotations.add_boxes(sample_token, sample_boxes)

    if verbose:
        print(
            "Loaded ground truth annotations for {} samples.".format(
                len(all_annotations.sample_tokens)
            )
        )

    return all_annotations


def prediction_metrics(gt_box_match, pred_box):
    pred_traj = np.array(pred_box.traj)
    gt_traj_steps = gt_box_match.traj.reshape((-1, 2))
    valid_steps = gt_traj_steps.shape[0]
    if valid_steps <= 0:
        return np.array([0]), np.array([0]), 0
    nmodes = pred_traj.shape[0]
    pred_steps = pred_traj.shape[1]
    valid_mask = np.zeros((pred_steps,))
    gt_traj = np.zeros((pred_steps, 2))
    gt_traj[:valid_steps, :] = gt_traj_steps
    valid_mask[:valid_steps] = 1
    pred_traj = torch.tensor(pred_traj[None])
    gt_traj = torch.tensor(gt_traj[None])
    valid_mask = torch.tensor(valid_mask[None])
    ade_err, inds = min_ade(pred_traj, gt_traj, 1 - valid_mask)
    fde_err, inds = min_fde(pred_traj, gt_traj, 1 - valid_mask)
    mr_err = miss_rate(pred_traj, gt_traj, 1 - valid_mask, dist_thresh=2)
    return ade_err.numpy(), fde_err.numpy(), mr_err.numpy()


def accumulate(
    gt_boxes: EvalBoxes,
    pred_boxes: EvalBoxes,
    class_name: str,
    dist_fcn: Callable,
    dist_th: float,
    verbose: bool = False,
) -> DetectionMotionMetricData:
    """
    Average Precision over predefined different recall thresholds for a single distance threshold.
    The recall/conf thresholds and other raw metrics will be used in secondary metrics.
    :param gt_boxes: Maps every sample_token to a list of its sample_annotations.
    :param pred_boxes: Maps every sample_token to a list of its sample_results.
    :param class_name: Class to compute AP on.
    :param dist_fcn: Distance function used to match detections and ground truths.
    :param dist_th: Distance threshold for a match.
    :param verbose: If true, print debug messages.
    :return: (average_prec, metrics). The average precision value and raw data for a number of metrics.
    """
    # ---------------------------------------------
    # Organize input and initialize accumulators.
    # ---------------------------------------------

    # Count the positives.
    npos = len([1 for gt_box in gt_boxes.all if gt_box.detection_name == class_name])
    if verbose:
        print(
            "Found {} GT of class {} out of {} total across {} samples.".format(
                npos, class_name, len(gt_boxes.all), len(gt_boxes.sample_tokens)
            )
        )

    # For missing classes in the GT, return a data structure corresponding to no predictions.
    if npos == 0:
        return DetectionMotionMetricData.no_predictions(), 0, 0, 0

    # Organize the predictions in a single list.
    pred_boxes_list = [
        box for box in pred_boxes.all if box.detection_name == class_name
    ]
    pred_confs = [box.detection_score for box in pred_boxes_list]

    if verbose:
        print(
            "Found {} PRED of class {} out of {} total across {} samples.".format(
                len(pred_confs),
                class_name,
                len(pred_boxes.all),
                len(pred_boxes.sample_tokens),
            )
        )

    # Sort by confidence.
    sortind = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(pred_confs))][::-1]

    # Do the actual matching.
    tp = []  # Accumulator of true positives.
    fp = []  # Accumulator of false positives.
    conf = []  # Accumulator of confidences.

    # match_data holds the extra metrics we calculate for each match.
    match_data = {
        "trans_err": [],
        "vel_err": [],
        "scale_err": [],
        "orient_err": [],
        "attr_err": [],
        "conf": [],
        "min_ade_err": [],
        "min_fde_err": [],
        "miss_rate_err": [],
    }

    # ---------------------------------------------
    # Match and accumulate match data.
    # ---------------------------------------------

    taken = set()  # Initially no gt bounding box is matched.
    for ind in sortind:
        pred_box = pred_boxes_list[ind]
        min_dist = np.inf
        match_gt_idx = None

        for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]):

            # Find closest match among ground truth boxes
            if (
                gt_box.detection_name == class_name
                and not (pred_box.sample_token, gt_idx) in taken
            ):
                this_distance = dist_fcn(gt_box, pred_box)
                if this_distance < min_dist:
                    min_dist = this_distance
                    match_gt_idx = gt_idx

        # If the closest match is close enough according to threshold we have a match!
        is_match = min_dist < dist_th

        if is_match:
            taken.add((pred_box.sample_token, match_gt_idx))

            #  Update tp, fp and confs.
            tp.append(1)
            fp.append(0)
            conf.append(pred_box.detection_score)

            # Since it is a match, update match data also.
            gt_box_match = gt_boxes[pred_box.sample_token][match_gt_idx]

            match_data["trans_err"].append(center_distance(gt_box_match, pred_box))
            match_data["vel_err"].append(velocity_l2(gt_box_match, pred_box))
            match_data["scale_err"].append(1 - scale_iou(gt_box_match, pred_box))

            # Barrier orientation is only determined up to 180 degree. (For cones orientation is discarded later)
            period = np.pi if class_name == "barrier" else 2 * np.pi
            match_data["orient_err"].append(
                yaw_diff(gt_box_match, pred_box, period=period)
            )

            match_data["attr_err"].append(1 - attr_acc(gt_box_match, pred_box))
            minade, minfde, m_r = prediction_metrics(gt_box_match, pred_box)

            match_data["min_ade_err"].append(minade)
            match_data["min_fde_err"].append(minfde)
            match_data["miss_rate_err"].append(m_r)
            match_data["conf"].append(pred_box.detection_score)

        else:
            # No match. Mark this as a false positive.
            tp.append(0)
            fp.append(1)
            conf.append(pred_box.detection_score)

    # Check if we have any matches. If not, just return a "no predictions" array.
    if len(match_data["trans_err"]) == 0:
        return DetectionMotionMetricData.no_predictions(), 0, 0, 0

    # ---------------------------------------------
    # Calculate and interpolate precision and recall
    # ---------------------------------------------

    # Accumulate.
    N_tp = np.sum(tp)
    N_fp = np.sum(fp)
    tp = np.cumsum(tp).astype(float)
    fp = np.cumsum(fp).astype(float)
    conf = np.array(conf)

    # Calculate precision and recall.
    prec = tp / (fp + tp)
    rec = tp / float(npos)

    rec_interp = np.linspace(
        0, 1, DetectionMotionMetricData.nelem
    )  # 101 steps, from 0% to 100% recall.
    prec = np.interp(rec_interp, rec, prec, right=0)
    conf = np.interp(rec_interp, rec, conf, right=0)
    rec = rec_interp

    # ---------------------------------------------
    # Re-sample the match-data to match, prec, recall and conf.
    # ---------------------------------------------

    for key in match_data.keys():
        if key == "conf":
            continue  # Confidence is used as reference to align with fp and tp. So skip in this step.

        else:
            # For each match_data, we first calculate the accumulated mean.
            tmp = cummean(np.array(match_data[key]))

            # Then interpolate based on the confidences. (Note reversing since np.interp needs increasing arrays)
            match_data[key] = np.interp(
                conf[::-1], match_data["conf"][::-1], tmp[::-1]
            )[::-1]

    # ---------------------------------------------
    # Done. Instantiate MetricData and return
    # ---------------------------------------------
    return (
        DetectionMotionMetricData(
            recall=rec,
            precision=prec,
            confidence=conf,
            trans_err=match_data["trans_err"],
            vel_err=match_data["vel_err"],
            scale_err=match_data["scale_err"],
            orient_err=match_data["orient_err"],
            attr_err=match_data["attr_err"],
            min_ade_err=match_data["min_ade_err"],
            min_fde_err=match_data["min_fde_err"],
            miss_rate_err=match_data["miss_rate_err"],
        ),
        N_tp,
        N_fp,
        npos,
    )


def accumulate_motion(
    gt_boxes: EvalBoxes,
    pred_boxes: EvalBoxes,
    class_name: str,
    dist_fcn: Callable,
    traj_fcn: Callable,
    dist_th: float,
    traj_dist_th: float,
    verbose: bool = False,
    final_step: float = 12,
) -> DetectionMotionMetricData:
    """
    Average Precision over predefined different recall thresholds for a single distance threshold.
    The recall/conf thresholds and other raw metrics will be used in secondary metrics.
    :param gt_boxes: Maps every sample_token to a list of its sample_annotations.
    :param pred_boxes: Maps every sample_token to a list of its sample_results.
    :param class_name: Class to compute AP on.
    :param dist_fcn: Distance function used to match detections and ground truths.
    :param dist_th: Distance threshold for a match.
    :param verbose: If true, print debug messages.
    :return: (average_prec, metrics). The average precision value and raw data for a number of metrics.
    """
    # ---------------------------------------------
    # Organize input and initialize accumulators.
    # ---------------------------------------------

    # Count the positives.
    npos = len([1 for gt_box in gt_boxes.all if gt_box.detection_name == class_name])
    if verbose:
        print(
            "Found {} GT of class {} out of {} total across {} samples.".format(
                npos, class_name, len(gt_boxes.all), len(gt_boxes.sample_tokens)
            )
        )

    # For missing classes in the GT, return a data structure corresponding to no predictions.
    if npos == 0:
        return DetectionMotionMetricData.no_predictions(), 0, 0, 0

    #
    # Organize the predictions in a single list.
    pred_boxes_list = []
    pred_confs = []

    pred_boxes_list = [
        box for box in pred_boxes.all if box.detection_name == class_name
    ]
    pred_confs = [box.detection_score for box in pred_boxes_list]
    # for box in pred_boxes.all:
    #     if box.detection_name == class_name:
    #         box.traj_scores = np.exp(box.traj_scores)
    #         for i in range(len(box.traj_scores)):
    #             box.traj_index = i
    #             pred_boxes_list.append(box)
    # pred_confs = [box.detection_score * box.traj_scores[box.traj_index]  for box in pred_boxes_list]

    if verbose:
        print(
            "Found {} PRED of class {} out of {} total across {} samples.".format(
                len(pred_confs),
                class_name,
                len(pred_boxes.all),
                len(pred_boxes.sample_tokens),
            )
        )

    # Sort by confidence.
    sortind = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(pred_confs))][::-1]

    # Do the actual matching.
    tp = []  # Accumulator of true positives.
    fp = []  # Accumulator of false positives.
    conf = []  # Accumulator of confidences.

    # match_data holds the extra metrics we calculate for each match.
    match_data = {
        "trans_err": [],
        "vel_err": [],
        "scale_err": [],
        "orient_err": [],
        "attr_err": [],
        "conf": [],
        "min_ade_err": [],
        "min_fde_err": [],
        "miss_rate_err": [],
    }

    # ---------------------------------------------
    # Match and accumulate match data.
    # ---------------------------------------------

    taken = set()  # Initially no gt bounding box is matched.
    for ind in sortind:
        pred_box = pred_boxes_list[ind]
        min_dist = np.inf
        match_gt_idx = None

        for gt_idx, gt_box in enumerate(gt_boxes[pred_box.sample_token]):

            # Find closest match among ground truth boxes
            if (
                gt_box.detection_name == class_name
                and not (pred_box.sample_token, gt_idx) in taken
            ):
                this_distance = dist_fcn(gt_box, pred_box)
                if this_distance < min_dist:
                    min_dist = this_distance
                    match_gt_idx = gt_idx
                    fde_distance = traj_fcn(gt_box, pred_box, final_step)
        # If the closest match is close enough according to threshold we have a match!
        is_match = min_dist < dist_th and fde_distance < traj_dist_th

        if is_match:
            taken.add((pred_box.sample_token, match_gt_idx))

            #  Update tp, fp and confs.
            tp.append(1)
            fp.append(0)
            conf.append(pred_box.detection_score)

            # Since it is a match, update match data also.
            gt_box_match = gt_boxes[pred_box.sample_token][match_gt_idx]

            match_data["trans_err"].append(center_distance(gt_box_match, pred_box))
            match_data["vel_err"].append(velocity_l2(gt_box_match, pred_box))
            match_data["scale_err"].append(1 - scale_iou(gt_box_match, pred_box))

            # Barrier orientation is only determined up to 180 degree. (For cones orientation is discarded later)
            period = np.pi if class_name == "barrier" else 2 * np.pi
            match_data["orient_err"].append(
                yaw_diff(gt_box_match, pred_box, period=period)
            )

            match_data["attr_err"].append(1 - attr_acc(gt_box_match, pred_box))
            minade, minfde, m_r = prediction_metrics(gt_box_match, pred_box)

            match_data["min_ade_err"].append(minade)
            match_data["min_fde_err"].append(minfde)
            match_data["miss_rate_err"].append(m_r)
            match_data["conf"].append(pred_box.detection_score)

        else:
            # No match. Mark this as a false positive.
            tp.append(0)
            fp.append(1)
            conf.append(pred_box.detection_score)
            # conf.append(pred_box.detection_score * pred_box.traj_scores[pred_box.traj_index])
    #
    # Check if we have any matches. If not, just return a "no predictions" array.
    if len(match_data["trans_err"]) == 0:
        return DetectionMotionMetricData.no_predictions(), 0, 0, 0

    # ---------------------------------------------
    # Calculate and interpolate precision and recall
    # ---------------------------------------------

    # Accumulate.
    N_tp = np.sum(tp)
    N_fp = np.sum(fp)
    tp = np.cumsum(tp).astype(float)
    fp = np.cumsum(fp).astype(float)
    conf = np.array(conf)

    # Calculate precision and recall.
    prec = tp / (fp + tp)
    rec = tp / float(npos)

    rec_interp = np.linspace(
        0, 1, DetectionMotionMetricData.nelem
    )  # 101 steps, from 0% to 100% recall.
    prec = np.interp(rec_interp, rec, prec, right=0)
    conf = np.interp(rec_interp, rec, conf, right=0)
    rec = rec_interp

    # ---------------------------------------------
    # Re-sample the match-data to match, prec, recall and conf.
    # ---------------------------------------------

    for key in match_data.keys():
        if key == "conf":
            continue  # Confidence is used as reference to align with fp and tp. So skip in this step.

        else:
            # For each match_data, we first calculate the accumulated mean.
            tmp = cummean(np.array(match_data[key]))

            # Then interpolate based on the confidences. (Note reversing since np.interp needs increasing arrays)
            match_data[key] = np.interp(
                conf[::-1], match_data["conf"][::-1], tmp[::-1]
            )[::-1]

    # ---------------------------------------------
    # Done. Instantiate MetricData and return
    # ---------------------------------------------
    return (
        DetectionMotionMetricData(
            recall=rec,
            precision=prec,
            confidence=conf,
            trans_err=match_data["trans_err"],
            vel_err=match_data["vel_err"],
            scale_err=match_data["scale_err"],
            orient_err=match_data["orient_err"],
            attr_err=match_data["attr_err"],
            min_ade_err=match_data["min_ade_err"],
            min_fde_err=match_data["min_fde_err"],
            miss_rate_err=match_data["miss_rate_err"],
        ),
        N_tp,
        N_fp,
        npos,
    )