Spaces:

reputation
/

yolov3

Sleeping

App Files Files Community

reputation commited on Mar 15, 2024

Commit

d39299a

verified ·

1 Parent(s): 18d079b

Update utils.py

Browse files

Files changed (1) hide show

utils.py +2 -108

utils.py CHANGED Viewed

@@ -12,13 +12,6 @@ from tqdm import tqdm
 def iou_width_height(boxes1, boxes2):
-    """
-    Parameters:
-        boxes1 (tensor): width and height of the first bounding boxes
-        boxes2 (tensor): width and height of the second bounding boxes
-    Returns:
-        tensor: Intersection over union of the corresponding boxes
-    """
     intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
         boxes1[..., 1], boxes2[..., 1]
     )
@@ -29,21 +22,6 @@ def iou_width_height(boxes1, boxes2):
 def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
-    """
-    Video explanation of this function:
-    https://youtu.be/XXYG5ZWtjj0
-    This function calculates intersection over union (iou) given pred boxes
-    and target boxes.
-    Parameters:
-        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
-        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
-        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
-    Returns:
-        tensor: Intersection over union for all examples
-    """
     if box_format == "midpoint":
         box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
@@ -78,22 +56,6 @@ def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
 def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
-    """
-    Video explanation of this function:
-    https://youtu.be/YDkjWEN8jNA
-    Does Non Max Suppression given bboxes
-    Parameters:
-        bboxes (list): list of lists containing all bboxes with each bboxes
-        specified as [class_pred, prob_score, x1, y1, x2, y2]
-        iou_threshold (float): threshold where predicted bboxes is correct
-        threshold (float): threshold to remove predicted bboxes (independent of IoU)
-        box_format (str): "midpoint" or "corners" used to specify bboxes
-    Returns:
-        list: bboxes after performing NMS given a specific IoU threshold
-    """
     assert type(bboxes) == list
@@ -124,37 +86,15 @@ def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
 def mean_average_precision(
         pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
 ):
-    """
-    Video explanation of this function:
-    https://youtu.be/FppOzcDvaDI
-    This function calculates mean average precision (mAP)
-    Parameters:
-        pred_boxes (list): list of lists containing all bboxes with each bboxes
-        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
-        true_boxes (list): Similar as pred_boxes except all the correct ones
-        iou_threshold (float): threshold where predicted bboxes is correct
-        box_format (str): "midpoint" or "corners" used to specify bboxes
-        num_classes (int): number of classes
-    Returns:
-        float: mAP value across all classes given a specific IoU threshold
-    """
-    # list storing all AP for respective classes
     average_precisions = []
-    # used for numerical stability later on
     epsilon = 1e-6
     for c in range(num_classes):
         detections = []
         ground_truths = []
-        # Go through all predictions and targets,
-        # and only add the ones that belong to the
-        # current class c
         for detection in pred_boxes:
             if detection[1] == c:
                 detections.append(detection)
@@ -162,33 +102,19 @@ def mean_average_precision(
         for true_box in true_boxes:
             if true_box[1] == c:
                 ground_truths.append(true_box)
-        # find the amount of bboxes for each training example
-        # Counter here finds how many ground truth bboxes we get
-        # for each training example, so let's say img 0 has 3,
-        # img 1 has 5 then we will obtain a dictionary with:
-        # amount_bboxes = {0:3, 1:5}
         amount_bboxes = Counter([gt[0] for gt in ground_truths])
-        # We then go through each key, val in this dictionary
-        # and convert to the following (w.r.t same example):
-        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
         for key, val in amount_bboxes.items():
             amount_bboxes[key] = torch.zeros(val)
-        # sort by box probabilities which is index 2
         detections.sort(key=lambda x: x[2], reverse=True)
         TP = torch.zeros((len(detections)))
         FP = torch.zeros((len(detections)))
         total_true_bboxes = len(ground_truths)
-        # If none exists for this class then we can safely skip
         if total_true_bboxes == 0:
             continue
         for detection_idx, detection in enumerate(detections):
-            # Only take out the ground_truths that have the same
-            # training idx as detection
             ground_truth_img = [
                 bbox for bbox in ground_truths if bbox[0] == detection[0]
             ]
@@ -208,15 +134,12 @@ def mean_average_precision(
                     best_gt_idx = idx
             if best_iou > iou_threshold:
-                # only detect ground truth detection once
                 if amount_bboxes[detection[0]][best_gt_idx] == 0:
-                    # true positive and add this bounding box to seen
                     TP[detection_idx] = 1
                     amount_bboxes[detection[0]][best_gt_idx] = 1
                 else:
                     FP[detection_idx] = 1
-            # if IOU is lower then the detection is a false positive
             else:
                 FP[detection_idx] = 1
@@ -233,22 +156,13 @@ def mean_average_precision(
 def plot_image(image, boxes):
-    """Plots predicted bounding boxes on the image"""
     cmap = plt.get_cmap("tab20b")
     class_labels = config.COCO_LABELS if config.DATASET == 'COCO' else config.PASCAL_CLASSES
     colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
     im = np.array(image)
     height, width, _ = im.shape
-    # Create figure and axes
     fig, ax = plt.subplots(1)
-    # Display the image
     ax.imshow(im)
-    # box[0] is x midpoint, box[2] is width
-    # box[1] is y midpoint, box[3] is height
-    # Create a Rectangle patch
     for box in boxes:
         assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
         class_pred = box[0]
@@ -263,7 +177,6 @@ def plot_image(image, boxes):
             edgecolor=colors[int(class_pred)],
             facecolor="none",
         )
-        # Add the patch to the Axes
         ax.add_patch(rect)
         plt.text(
             upper_left_x * width,
@@ -286,7 +199,6 @@ def get_evaluation_bboxes(
         box_format="midpoint",
         device="cuda",
 ):
-    # make sure model is in eval before get bboxes
     model.eval()
     train_idx = 0
     all_pred_boxes = []
@@ -308,7 +220,6 @@ def get_evaluation_bboxes(
             for idx, (box) in enumerate(boxes_scale_i):
                 bboxes[idx] += box
-        # we just want one bbox for each label, not one for each scale
         true_bboxes = cells_to_bboxes(
             labels[2], anchor, S=S, is_preds=False
         )
@@ -335,19 +246,6 @@ def get_evaluation_bboxes(
 def cells_to_bboxes(predictions, anchors, S, is_preds=True):
-    """
-    Scales the predictions coming from the model to
-    be relative to the entire image such that they for example later
-    can be plotted or.
-    INPUT:
-    predictions: tensor of size (N, 3, S, S, num_classes+5)
-    anchors: the anchors used for the predictions
-    S: the number of cells the image is divided in on the width (and height)
-    is_preds: whether the input is predictions or the true bounding boxes
-    OUTPUT:
-    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
-                      object score, bounding box coordinates
-    """
     BATCH_SIZE = predictions.shape[0]
     num_anchors = len(anchors)
     box_predictions = predictions[..., 1:5]
@@ -387,8 +285,8 @@ def check_class_accuracy(model, loader, threshold):
         for i in range(3):
             y[i] = y[i].to(config.DEVICE)
-            obj = y[i][..., 0] == 1  # in paper this is Iobj_i
-            noobj = y[i][..., 0] == 0  # in paper this is Iobj_i
             correct_class += torch.sum(
                 torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
@@ -408,7 +306,6 @@ def check_class_accuracy(model, loader, threshold):
 def get_mean_std(loader):
-    # var[X] = E[X**2] - E[X]**2
     channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
     for data, _ in tqdm(loader):
@@ -436,9 +333,6 @@ def load_checkpoint(checkpoint_file, model, optimizer, lr):
     checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
     model.load_state_dict(checkpoint["state_dict"])
     optimizer.load_state_dict(checkpoint["optimizer"])
-    # If we don't do this then it will just have learning rate of old checkpoint
-    # and it will lead to many hours of debugging \:
     for param_group in optimizer.param_groups:
         param_group["lr"] = lr

 def iou_width_height(boxes1, boxes2):
     intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
         boxes1[..., 1], boxes2[..., 1]
     )
 def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
     if box_format == "midpoint":
         box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
 def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
     assert type(bboxes) == list
 def mean_average_precision(
         pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
 ):
     average_precisions = []
     epsilon = 1e-6
     for c in range(num_classes):
         detections = []
         ground_truths = []
         for detection in pred_boxes:
             if detection[1] == c:
                 detections.append(detection)
         for true_box in true_boxes:
             if true_box[1] == c:
                 ground_truths.append(true_box)
         amount_bboxes = Counter([gt[0] for gt in ground_truths])
         for key, val in amount_bboxes.items():
             amount_bboxes[key] = torch.zeros(val)
         detections.sort(key=lambda x: x[2], reverse=True)
         TP = torch.zeros((len(detections)))
         FP = torch.zeros((len(detections)))
         total_true_bboxes = len(ground_truths)
         if total_true_bboxes == 0:
             continue
         for detection_idx, detection in enumerate(detections):
             ground_truth_img = [
                 bbox for bbox in ground_truths if bbox[0] == detection[0]
             ]
                     best_gt_idx = idx
             if best_iou > iou_threshold:
                 if amount_bboxes[detection[0]][best_gt_idx] == 0:
                     TP[detection_idx] = 1
                     amount_bboxes[detection[0]][best_gt_idx] = 1
                 else:
                     FP[detection_idx] = 1
             else:
                 FP[detection_idx] = 1
 def plot_image(image, boxes):
     cmap = plt.get_cmap("tab20b")
     class_labels = config.COCO_LABELS if config.DATASET == 'COCO' else config.PASCAL_CLASSES
     colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
     im = np.array(image)
     height, width, _ = im.shape
     fig, ax = plt.subplots(1)
     ax.imshow(im)
     for box in boxes:
         assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
         class_pred = box[0]
             edgecolor=colors[int(class_pred)],
             facecolor="none",
         )
         ax.add_patch(rect)
         plt.text(
             upper_left_x * width,
         box_format="midpoint",
         device="cuda",
 ):
     model.eval()
     train_idx = 0
     all_pred_boxes = []
             for idx, (box) in enumerate(boxes_scale_i):
                 bboxes[idx] += box
         true_bboxes = cells_to_bboxes(
             labels[2], anchor, S=S, is_preds=False
         )
 def cells_to_bboxes(predictions, anchors, S, is_preds=True):
     BATCH_SIZE = predictions.shape[0]
     num_anchors = len(anchors)
     box_predictions = predictions[..., 1:5]
         for i in range(3):
             y[i] = y[i].to(config.DEVICE)
+            obj = y[i][..., 0] == 1
+            noobj = y[i][..., 0] == 0
             correct_class += torch.sum(
                 torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
 def get_mean_std(loader):
     channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
     for data, _ in tqdm(loader):
     checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
     model.load_state_dict(checkpoint["state_dict"])
     optimizer.load_state_dict(checkpoint["optimizer"])
     for param_group in optimizer.param_groups:
         param_group["lr"] = lr