Spaces:

SEA-AI
/

box-metrics

Sleeping

App Files Files Community

gil.simas@sea.ai commited on Apr 23, 2024

Commit

930daa4

1 Parent(s): 6b2433c

metric works, not fully integrated

Browse files

Files changed (6) hide show

__pycache__/box_metrics.cpython-39.pyc +0 -0
__pycache__/utils.cpython-39.pyc +0 -0
box_metrics.py +229 -0
compute.py +88 -0
test.py +26 -0
utils.py +173 -0

__pycache__/box_metrics.cpython-39.pyc ADDED Viewed

Binary file (5.9 kB). View file

__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (4.85 kB). View file

box_metrics.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import evaluate
+import datasets
+import motmetrics as mm
+import numpy as np
+from seametrics.payload import Payload
+import torch
+from utils import bbox_iou, bbox_bep
+import datasets
+# _DESCRIPTION = """\
+# The box-metrics package provides a set of metrics to evaluate
+# the performance of object detection algorithms in ther of sizing and positioning
+# of the bounding boxes."""
+# _KWARGS_DESCRIPTION = """
+# Calculates how good are predictions given some references, using certain scores
+# Args:
+#     predictions: list of predictions to score. Each predictions
+#         should be a string with tokens separated by spaces.
+#     references: list of reference for each prediction. Each
+#         reference should be a string with tokens separated by spaces.
+#     max_iou (`float`, *optional*):
+#         If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive.
+#         Default is 0.5.
+# """
+# _CITATION = """\
+# @InProceedings{huggingface:module,
+# title = {A great new module},
+# authors={huggingface, Inc.},
+# year={2020}
+# }\
+# @article{milan2016mot16,
+#   title={Are object detection assessment criteria ready for maritime computer vision?},
+#   author={Dilip K. Prasad1, Deepu Rajan and Chai Quek},
+#   journal={arXiv:1809.04659v1},
+#   year={2018}
+# }
+# """
+_CITATION = """\
+@InProceedings{huggingface:module,
+title = {A great new module},
+authors={huggingface, Inc.},
+year={2020}
+}\
+@article{milan2016mot16,
+  title={MOT16: A benchmark for multi-object tracking},
+  author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad},
+  journal={arXiv preprint arXiv:1603.00831},
+  year={2016}
+}
+"""
+_DESCRIPTION = """\
+The MOT Metrics module is designed to evaluate multi-object tracking (MOT)
+algorithms by computing various metrics based on predicted and ground truth bounding
+boxes. It serves as a crucial tool in assessing the performance of MOT systems,
+aiding in the iterative improvement of tracking algorithms."""
+_KWARGS_DESCRIPTION = """
+Calculates how good are predictions given some references, using certain scores
+Args:
+    predictions: list of predictions to score. Each predictions
+        should be a string with tokens separated by spaces.
+    references: list of reference for each prediction. Each
+        reference should be a string with tokens separated by spaces.
+    max_iou (`float`, *optional*):
+        If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive.
+        Default is 0.5.
+"""
+# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class BoxMetrics(evaluate.Metric):
+    def __init__(self, max_iou: float = 0.01, **kwargs):
+        # super().__init__(**kwargs)
+        self.max_iou = max_iou
+        self.boxes = {}
+        self.gt_field = "ground_truth_det"
+    def _info(self):
+        # TODO: Specifies the evaluate.EvaluationModuleInfo object
+        return evaluate.MetricInfo(
+            # This is the description that will appear on the modules page.
+            module_type="metric",
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            # This defines the format of each prediction and reference
+            features=datasets.Features({
+                "predictions": datasets.Sequence(
+                                datasets.Sequence(datasets.Value("float"))
+                            ),
+                "references": datasets.Sequence(
+                                datasets.Sequence(datasets.Value("float"))
+                            )
+            }),
+            # Additional links to the codebase or references
+            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
+            reference_urls=["http://path.to.reference.url/new_module"]
+        )
+    def add_payload(self, payload: Payload):
+        """Convert a payload to the format of the tracking metrics library"""
+        self.add(payload)
+    def add(self, payload: Payload):
+        self.gt_field = payload.gt_field_name
+        for sequence in payload.sequences:
+            self.boxes[sequence] = {}
+            target = payload.sequences[sequence][self.gt_field]
+            resolution = payload.sequences[sequence]["resolution"]
+            target_tm = self.payload_labels_to_tm(target, resolution)
+            self.boxes[sequence][self.gt_field] = target_tm
+            for model in payload.models:
+                preds = payload.sequences[sequence][model]
+                preds_tm = self.payload_preds_to_rm(preds, resolution)
+                self.boxes[sequence][model] = preds_tm
+    def compute(self):
+        """Compute the metric value"""
+        output = {}
+        for sequence in self.boxes:
+            ious = []
+            beps = []
+            bottom_x = []
+            bottom_y = []
+            widths = []
+            heights = []
+            output[sequence] = {}
+            target = self.boxes[sequence][self.gt_field]
+            for model in self.boxes[sequence]:
+                preds = self.boxes[sequence][model]
+                for i in range(len(preds)):
+                    target_tm_bbs = target[i][:, 1:]
+                    pred_tm_bbs = preds[i][:, :4]
+                    if target_tm_bbs.shape[0] == 0 or pred_tm_bbs.shape[0] == 0:
+                        continue
+                    for t_box in target_tm_bbs:
+                        iou = bbox_iou(t_box.unsqueeze(0), pred_tm_bbs, xywh=False)
+                        bep = bbox_bep(t_box.unsqueeze(0), pred_tm_bbs, xywh=False)
+                        matches = pred_tm_bbs[iou.squeeze(1) > self.max_iou]
+                        bep = bep[iou>self.max_iou]
+                        iou = iou[iou>self.max_iou]
+                        if torch.any(iou <= 0):
+                            raise ValueError("IoU should be greater than 0, pls contact code maintainer")
+                        if torch.any(bep <= 0):
+                            raise ValueError("BEP should be greater than 0, pls contact code maintainer")
+                        ious.extend(iou.tolist())
+                        beps.extend(bep.tolist())
+                        for match in matches:
+                            t_xc = (match[0].item()+match[2].item())/2
+                            p_xc = (t_box[0].item()+t_box[2].item())/2
+                            t_w = t_box[2].item()-t_box[0].item()
+                            p_w = match[2].item()-match[0].item()
+                            t_h = t_box[3].item()-t_box[1].item()
+                            p_h = match[3].item()-match[1].item()
+                            bottom_x.append(abs(t_xc-p_xc))
+                            widths.append(abs(t_w-p_w))
+                            bottom_y.append(abs(t_box[1].item()-match[1].item()))
+                            heights.append(abs(t_h-p_h))
+            output[sequence][model] = {
+                                    "iou_mean": np.mean(ious),
+                                    "bep_mean": np.mean(beps),
+                                    "bottom_x_mean": np.mean(bottom_x),
+                                    "bottom_y_mean": np.mean(bottom_y),
+                                    "width_mean": np.mean(widths),
+                                    "height_mean": np.mean(heights),
+                                    "bottom_x_std": np.std(bottom_x),
+                                    "bottom_y_std": np.std(bottom_y),
+                                    "width_std": np.std(widths),
+                                    "height_std": np.std(heights)
+                                    }
+        return output
+    @staticmethod
+    def payload_labels_to_tm(labels, resolution):
+        """Convert the labels of a payload sequence to the format of torch metrics"""
+        target_tm = []
+        for frame in labels:
+            target_tm_frame = []
+            for det in frame:
+                label = 0
+                box = det["bounding_box"]
+                x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
+                x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height
+                target_tm_frame.append([label, x1, y1, x2, y2])
+            target_tm.append(torch.tensor(target_tm_frame) if len(target_tm_frame) > 0 else torch.empty((0, 5)))
+        return target_tm
+    @staticmethod
+    def payload_preds_to_rm(preds, resolution):
+        """Convert the predictions of a payload sequence to the format of torch metrics"""
+        preds_tm = []
+        for frame in preds:
+            pred_tm_frame = []
+            for det in frame:
+                label = 0
+                box = det["bounding_box"]
+                x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
+                x1, y1, x2, y2 = x1*resolution.width, y1*resolution.height, x2*resolution.width, y2*resolution.height
+                conf = 1
+                pred_tm_frame.append([x1, y1, x2, y2, conf, label])
+            preds_tm.append(torch.tensor(pred_tm_frame) if len(pred_tm_frame) > 0 else torch.empty((0, 6)))
+        return preds_tm

compute.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import numpy as np
+from utils import BoxMetrics, concat_labels, concat_preds
+import fiftyone as fo
+from seametrics.fo_utils.utils import fo_to_payload
+from const import INDEX_MAPPING, CLASS_MAPPING, INDEX_MAPPING_INV
+from tqdm import tqdm
+tags = ["WHALES"]
+cameras = ["thermal_narrow"]
+dataset_name = "SENTRY_VIDEOS_DATASET_QA"
+#dataset_name = "SENTRY_VIDEOS_DATASET_QA"
+model = "cerulean-level-17_11_2023_RL_SPLIT_ep147_CNN"
+det_gt_field = "ground_truth_det"
+cm = BoxMetrics(nc=10, conf=0, iou_thres=0)
+if dataset_name == "SAILING_DATASET_QA":
+    cameras = ["thermal_left"]
+    dataset_view = fo.load_dataset(dataset_name).match_tags(tags).select_group_slices(cameras).filter_labels(f"{model}", True, only_matches=False)
+    sequences = dataset_view.distinct("sequence")
+if dataset_name == "SENTRY_VIDEOS_DATASET_QA":
+    cameras = ["thermal_wide"]
+    dataset_view = fo.load_dataset(dataset_name).match_tags(tags).select_group_slices(cameras).filter_labels(f"frames.{model}", True, only_matches=False)
+    sequences = dataset_view.distinct("sequence")
+for sequence in tqdm(sequences):
+    payload = fo_to_payload(dataset = dataset_name,
+                            gt_field = det_gt_field,
+                            models = [model],
+                            tracking_mode = True,
+                            sequence_list = [sequence],
+                            excluded_classes = ["BIRD"],)
+    target = payload["sequences"][sequence][det_gt_field]
+    preds = payload["sequences"][sequence][model]
+    resolution = payload["sequences"][sequence]["resolution"]
+    target_tm = []
+    preds_tm = []
+    for frame in target:
+        target_tm_batch = []
+        for det in frame:
+            if CLASS_MAPPING[det["label"]] is not None:
+                label = INDEX_MAPPING[CLASS_MAPPING[det["label"]]]-1
+            else:
+                continue
+            box = det["bounding_box"]
+            x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
+            x1, y1, x2, y2 = x1*resolution[1], y1*resolution[0], x2*resolution[1], y2*resolution[0]
+            target_tm_batch.append([label, x1, y1, x2, y2])
+        target_tm.append(torch.tensor(target_tm_batch) if len(target_tm_batch) > 0 else torch.empty((0, 5)))
+    for frame in preds:
+        pred_tm_batch = []
+        for det in frame:
+            label = INDEX_MAPPING[det["label"]]-1
+            box = det["bounding_box"]
+            x1, y1, x2, y2 = box[0], box[1], box[0]+box[2], box[1]+box[3]
+            x1, y1, x2, y2 = x1*resolution[1], y1*resolution[0], x2*resolution[1], y2*resolution[0]
+            conf = 1
+            pred_tm_batch.append([x1, y1, x2, y2, conf, label])
+        preds_tm.append(torch.tensor(pred_tm_batch) if len(pred_tm_batch) > 0 else torch.empty((0, 6)))
+    for i in range(len(target_tm)):
+        target_batch = target_tm[i]
+        pred_batch = preds_tm[i]
+        cm.process_batch(pred_batch, target_batch)
+print("SUMMARY: ")
+print("\nmodel: ", model)
+print("\nconfusion matrix: ")
+print(cm.matrix.astype(int))
+tp = cm.matrix[:-1, :-1].sum()
+fp = cm.matrix[:-1, -1].sum()
+fn = cm.matrix[-1, :-1].sum()
+print("\nTP: ", tp, "FP: ", fp, "FN: ", fn, "support: ", tp + fn)
+#Detection Rates:
+print("\nDetection Rates:")
+for i in range(10):
+    tp = cm.matrix[:-1, i].sum()
+    fn = cm.matrix[-1, i].sum()
+    if tp + fn == 0:
+        print(f"{INDEX_MAPPING_INV[i+1]}: NaN")
+    else:
+        print(f"{INDEX_MAPPING_INV[i+1]}: {tp/(tp+fn)}")

test.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import torch
+import numpy as np
+import fiftyone as fo
+from box_metrics import BoxMetrics
+from seametrics.fo_utils.utils import fo_to_payload
+from tqdm import tqdm
+tags = ["WHALES"]
+dataset_name = "SENTRY_VIDEOS_DATASET_QA"
+model = "cerulean-level-17_11_2023_RL_SPLIT_ep147_CNN"
+det_gt_field = "ground_truth_det"
+dataset = fo.load_dataset(dataset_name)
+dataset_view = fo.load_dataset(dataset_name).match_tags(tags) if tags else fo.load_dataset(dataset_name)
+sequences = dataset_view.distinct("sequence")
+bbox_metric = BoxMetrics(max_iou=0.01)
+payload = fo_to_payload(dataset = dataset_name,
+                        gt_field = det_gt_field,
+                        models = [model],
+                        tracking_mode = True,
+                        sequence_list = sequences)
+print(payload)
+bbox_metric.add_payload(payload)
+result = bbox_metric.compute()
+print(result)

utils.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import torch
+import numpy as np
+import math
+def bbox_bep(box1, box2, xywh=True, eps=1e-7, bep1 = True):
+    """
+    Calculates bottom edge proximity between two boxes
+    Input shapes are box1(1,4) to box2(n,4)
+    Implementation of bep2 from
+        Are object detection assessment criteria ready for maritime computer vision?
+    """
+    # Get the coordinates of bounding boxes
+    if xywh:  # transform from xywh to xyxy
+        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
+        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
+        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)
+    # Bottom edge distance (absolute value)
+    # xb = torch.abs(b2_x2 - b1_x1)
+    xb = torch.min(b2_x2-b1_x1, b1_x2-b2_x1)
+    xa = w2 - xb
+    xc = w1 - xb
+    ybe = torch.abs(b2_y2 - b1_y2)
+    X2 = xb/(xb+xa)
+    Y2 = 1-ybe/h2
+    X1 = xb/(xb+xa+xc+eps)
+    Y1 = 1-ybe/(torch.max(h2,h1)+eps)
+    bep = X1*Y1 if bep1 else X2*Y2
+    return bep
+def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    """
+    Calculates IoU, GIoU, DIoU, or CIoU between two boxes, supporting xywh/xyxy formats.
+    Input shapes are box1(1,4) to box2(n,4).
+    """
+    # Get the coordinates of bounding boxes
+    if xywh:  # transform from xywh to xyxy
+        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
+        w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2
+        b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_
+        b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_
+    else:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1)
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1)
+        w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clamp(eps)
+        w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clamp(eps)
+    # Intersection area
+    inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * (
+        b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)
+    ).clamp(0)
+    # Union Area
+    union = w1 * h1 + w2 * h2 - inter + eps
+    # IoU
+    iou = inter / union
+    if CIoU or DIoU or GIoU:
+        cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1)  # convex (smallest enclosing box) width
+        ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center dist ** 2
+            if CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi**2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2)
+                with torch.no_grad():
+                    alpha = v / (v - iou + (1 + eps))
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+            return iou - rho2 / c2  # DIoU
+        c_area = cw * ch + eps  # convex area
+        return iou - (c_area - union) / c_area  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+    return iou  # IoU
+class BoxMetrics:
+    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+    def __init__(self):
+        self.preds_tm = []
+        self.target_tm = []
+        self.bottom_x = []
+        self.bottom_y = []
+        self.widths = []
+        self.heights = []
+        self.ious = []
+        self.beps = []
+    def add_batch(self, preds, target):
+        """
+        Return intersection-over-union (Jaccard index) of boxes.
+        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+        Arguments:
+            detections torch(Array[N, 6]), x1, y1, x2, y2, conf, class
+            labels torch(Array[M, 5]), class, x1, y1, x2, y2
+        Returns:
+            None, updates confusion matrix accordingly
+        """
+        self.preds_tm.extend(preds)
+        self.target_tm.extend(target)
+    def compute(self):
+        """
+        Computes bbox iou, bep and location/size statistics
+        """
+        for i in range(len(self.target_tm)):
+            target_batch_boxes = self.target_tm[i][:, 1:]
+            pred_batch_boxes = self.preds_tm[i][:, :4]
+            if pred_batch_boxes.shape[0] == 0:
+                continue
+            if target_batch_boxes.shape[0] == 0:
+                continue
+            for t_box in target_batch_boxes:
+                    iou = bbox_iou(t_box.unsqueeze(0), pred_batch_boxes, xywh=False)
+                    bep = bbox_bep(t_box.unsqueeze(0), pred_batch_boxes, xywh=False)
+                    matches = pred_batch_boxes[iou.squeeze(1) > 0.1]
+                    bep = bep[iou > 0]
+                    iou = iou[iou > 0]
+                    # if any iou value is 0 or less, raise error
+                    if torch.any(iou <= 0):
+                        raise ValueError("IoU values must be greater than 0.")
+                    #same for bep
+                    if torch.any(bep <= 0):
+                        print(t_box.unsqueeze(0))
+                        print(pred_batch_boxes)
+                        print(bep)
+                        print(iou)
+                        raise ValueError("BEP values must be greater than 0.")
+                    self.ious.extend(iou.tolist())
+                    self.beps.extend(bep.tolist())
+                    for match in matches:
+                        t_xc = (match[0].item()+match[2].item())/2
+                        p_xc = (t_box[0].item()+t_box[2].item())/2
+                        t_w = t_box[2].item()-t_box[0].item()
+                        p_w = match[2].item()-match[0].item()
+                        t_h = t_box[3].item()-t_box[1].item()
+                        p_h = match[3].item()-match[1].item()
+                        self.bottom_x.append(p_xc - t_xc)
+                        self.bottom_y.append(match[3].item()-t_box[3].item())
+                        self.widths.append(p_w-t_w)
+                        self.heights.append(p_h-t_h)
+        return {"iou_mean": np.mean(self.ious),
+                "bep_mean": np.mean(self.beps),
+                "bottom_x_std": np.std(self.bottom_x),
+                "bottom_y_std": np.std(self.bottom_y),
+                "widths_std": np.std(self.widths),
+                "heights_std": np.std(self.heights),
+                "bottom_x_mean": np.mean(self.bottom_x),
+                "bottom_y_mean": np.mean(self.bottom_y),
+                "widths_mean": np.mean(self.widths),
+                "heights_mean": np.mean(self.heights)}