| | |
| | |
| |
|
| | import numpy as np |
| | import numpy.typing as npt |
| | from typing import Tuple, List |
| |
|
| |
|
| | def bb_iou_array( |
| | boxes: npt.NDArray[np.float64], new_box: npt.NDArray[np.float64] |
| | ) -> npt.NDArray[np.float64]: |
| | """ |
| | Calculates the Intersection over Union (IoU) between a box and an array of boxes. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max]. |
| | |
| | Returns: |
| | numpy.ndarray: Array of IoU values between the new_box and each box in the array. |
| | """ |
| | |
| | xA = np.maximum(boxes[:, 0], new_box[0]) |
| | yA = np.maximum(boxes[:, 1], new_box[1]) |
| | xB = np.minimum(boxes[:, 2], new_box[2]) |
| | yB = np.minimum(boxes[:, 3], new_box[3]) |
| |
|
| | interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0) |
| |
|
| | |
| | boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) |
| | boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1]) |
| |
|
| | iou = interArea / (boxAArea + boxBArea - interArea) |
| |
|
| | return iou |
| |
|
| |
|
| | def expand_boxes( |
| | boxes: npt.NDArray[np.float64], |
| | r_x: Tuple[float, float] = (1, 1), |
| | r_y: Tuple[float, float] = (1, 1), |
| | size_agnostic: bool = True, |
| | ) -> npt.NDArray[np.float64]: |
| | """ |
| | Expands bounding boxes by a specified ratio. |
| | Expected box format is normalized [x_min, y_min, x_max, y_max]. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion). |
| | r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion). |
| | size_agnostic (bool, optional): Expand independently of the box shape. Defaults to True. |
| | |
| | Returns: |
| | numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range. |
| | """ |
| | old_boxes = boxes.copy() |
| |
|
| | if not size_agnostic: |
| | h = boxes[:, 3] - boxes[:, 1] |
| | w = boxes[:, 2] - boxes[:, 0] |
| | else: |
| | h, w = 1, 1 |
| |
|
| | boxes[:, 0] -= w * (r_x[0] - 1) |
| | boxes[:, 2] += w * (r_x[1] - 1) |
| | boxes[:, 1] -= h * (r_y[0] - 1) |
| | boxes[:, 3] += h * (r_y[1] - 1) |
| |
|
| | boxes = np.clip(boxes, 0, 1) |
| |
|
| | |
| | for i in range(len(boxes)): |
| | for j in range(i + 1, len(boxes)): |
| | iou = bb_iou_array(boxes[i][None], boxes[j])[0] |
| | old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0] |
| | |
| | if iou > 0.05 and old_iou < 0.1: |
| | if boxes[i, 1] < boxes[j, 1]: |
| | boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3]) |
| | if old_iou > 0: |
| | boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1]) |
| | else: |
| | boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3]) |
| | if old_iou > 0: |
| | boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1]) |
| |
|
| | return boxes |
| |
|
| |
|
| | def retrieve_title( |
| | boxes: npt.NDArray[np.float64], |
| | labels: npt.NDArray[np.int_], |
| | classes: List[str], |
| | ) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.int_], npt.NDArray[np.float64]]: |
| | """ |
| | Retrieves missed captions by using the biggest `other` box. |
| | |
| | If no chart_title is detected, this function finds the largest box |
| | labeled as 'other' (with width > 0.3) and relabels it as 'chart_title'. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | labels (numpy.ndarray): Array of labels with shape (N,). |
| | classes (list): List of class labels. |
| | |
| | Returns: |
| | numpy.ndarray [N]: Array of labels. |
| | """ |
| | if classes.index("chart_title") not in labels: |
| | widths = boxes[:, 2] - boxes[:, 0] |
| | scores = widths * (labels == classes.index("other")) * (widths > 0.3) |
| | replaced = np.argmax(scores) if max(scores) > 0 else None |
| | if replaced is not None: |
| | labels[replaced] = classes.index("chart_title") |
| | return labels |
| |
|