| | |
| | |
| | import numpy as np |
| | import numpy.typing as npt |
| | from typing import List, Tuple, Optional |
| |
|
| |
|
| | def expand_boxes( |
| | boxes: npt.NDArray[np.float64], |
| | r_x: Tuple[float, float] = (1, 1), |
| | r_y: Tuple[float, float] = (1, 1), |
| | size_agnostic: bool = True, |
| | ) -> npt.NDArray[np.float64]: |
| | """ |
| | Expands bounding boxes by a specified ratio. |
| | Expected box format is normalized [x_min, y_min, x_max, y_max]. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion). |
| | r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion). |
| | size_agnostic (bool, optional): Expand independently of the box shape. Defaults to True. |
| | |
| | Returns: |
| | numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range. |
| | """ |
| | old_boxes = boxes.copy() |
| |
|
| | if not size_agnostic: |
| | h = boxes[:, 3] - boxes[:, 1] |
| | w = boxes[:, 2] - boxes[:, 0] |
| | else: |
| | h, w = 1, 1 |
| |
|
| | boxes[:, 0] -= w * (r_x[0] - 1) |
| | boxes[:, 2] += w * (r_x[1] - 1) |
| | boxes[:, 1] -= h * (r_y[0] - 1) |
| | boxes[:, 3] += h * (r_y[1] - 1) |
| |
|
| | boxes = np.clip(boxes, 0, 1) |
| |
|
| | |
| | for i in range(len(boxes)): |
| | for j in range(i + 1, len(boxes)): |
| | iou = bb_iou_array(boxes[i][None], boxes[j])[0] |
| | old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0] |
| | |
| | if iou > 0.05 and old_iou < 0.1: |
| | if boxes[i, 1] < boxes[j, 1]: |
| | boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3]) |
| | if old_iou > 0: |
| | boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1]) |
| | else: |
| | boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3]) |
| | if old_iou > 0: |
| | boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1]) |
| |
|
| | return boxes |
| |
|
| |
|
| | def merge_boxes( |
| | b1: npt.NDArray[np.float64], b2: npt.NDArray[np.float64] |
| | ) -> npt.NDArray[np.float64]: |
| | """ |
| | Merges two bounding boxes into a single box that encompasses both. |
| | |
| | Args: |
| | b1 (numpy.ndarray): First bounding box [x_min, y_min, x_max, y_max]. |
| | b2 (numpy.ndarray): Second bounding box [x_min, y_min, x_max, y_max]. |
| | |
| | Returns: |
| | numpy.ndarray: A single bounding box that covers both input boxes. |
| | """ |
| | b = b1.copy() |
| | b[0] = min(b1[0], b2[0]) |
| | b[1] = min(b1[1], b2[1]) |
| | b[2] = max(b1[2], b2[2]) |
| | b[3] = max(b1[3], b2[3]) |
| | return b |
| |
|
| |
|
| | def bb_iou_array( |
| | boxes: npt.NDArray[np.float64], new_box: npt.NDArray[np.float64] |
| | ) -> npt.NDArray[np.float64]: |
| | """ |
| | Calculates the Intersection over Union (IoU) between a box and an array of boxes. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max]. |
| | |
| | Returns: |
| | numpy.ndarray: Array of IoU values between the new_box and each box in the array. |
| | """ |
| | |
| | xA = np.maximum(boxes[:, 0], new_box[0]) |
| | yA = np.maximum(boxes[:, 1], new_box[1]) |
| | xB = np.minimum(boxes[:, 2], new_box[2]) |
| | yB = np.minimum(boxes[:, 3], new_box[3]) |
| |
|
| | interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0) |
| |
|
| | |
| | boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) |
| | boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1]) |
| |
|
| | iou = interArea / (boxAArea + boxBArea - interArea) |
| |
|
| | return iou |
| |
|
| |
|
| | def match_with_title( |
| | box: npt.NDArray[np.float64], |
| | title_boxes: npt.NDArray[np.float64], |
| | match_dist: float = 0.1, |
| | delta: float = 1., |
| | already_matched: List[int] = [], |
| | ) -> Tuple[Optional[npt.NDArray[np.float64]], Optional[List[int]]]: |
| | """ |
| | Matches a bounding box with a title bounding box based on IoU or proximity. |
| | |
| | Args: |
| | box (numpy.ndarray): Bounding box to match with title [x_min, y_min, x_max, y_max]. |
| | title_boxes (numpy.ndarray): Array of title bounding boxes with shape (N, 4). |
| | match_dist (float, optional): Maximum distance for matching. Defaults to 0.1. |
| | delta (float, optional): Multiplier for matching several titles. Defaults to 1.. |
| | already_matched (list, optional): List of already matched title indices. Defaults to []. |
| | |
| | Returns: |
| | tuple or None: If matched, returns a tuple of (merged_bbox, updated_title_boxes). |
| | If no match is found, returns None, None. |
| | """ |
| | if not len(title_boxes): |
| | return None, None |
| |
|
| | dist_above = np.abs(title_boxes[:, 3] - box[1]) |
| | dist_below = np.abs(box[3] - title_boxes[:, 1]) |
| |
|
| | dist_left = np.abs(title_boxes[:, 0] - box[0]) |
| | dist_center = np.abs(title_boxes[:, 0] + title_boxes[:, 2] - box[0] - box[2]) / 2 |
| |
|
| | dists = np.min([dist_above, dist_below], 0) |
| | dists += np.min([dist_left, dist_center], 0) / 2 |
| |
|
| | ious = bb_iou_array(title_boxes, box) |
| | dists = np.where(ious > 0, min(match_dist - 0.01, np.min(dists)) / delta, dists) |
| |
|
| | if len(already_matched): |
| | dists[already_matched] = match_dist * 10 |
| |
|
| | matches = None |
| | if np.min(dists) <= match_dist: |
| | matches = np.where( |
| | dists <= min(match_dist, np.min(dists) * delta) |
| | )[0] |
| |
|
| | if matches is not None: |
| | new_bbox = box |
| | for match in matches: |
| | new_bbox = merge_boxes(new_bbox, title_boxes[match]) |
| | return new_bbox, list(matches) |
| | else: |
| | return None, None |
| |
|
| |
|
| | def match_boxes_with_title( |
| | boxes: npt.NDArray[np.float64], |
| | confs: npt.NDArray[np.float64], |
| | labels: npt.NDArray[np.int_], |
| | classes: List[str], |
| | to_match_labels: List[str] = ["chart"], |
| | remove_matched_titles: bool = False, |
| | match_dist: float = 0.1, |
| | ) -> Tuple[ |
| | npt.NDArray[np.float64], |
| | npt.NDArray[np.float64], |
| | npt.NDArray[np.int_], |
| | List[int], |
| | ]: |
| | """ |
| | Matches charts with title. |
| | |
| | Args: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4). |
| | confs (numpy.ndarray): Array of confidence scores with shape (N,). |
| | labels (numpy.ndarray): Array of labels with shape (N,). |
| | classes (list): List of class names. |
| | to_match_labels (list): List of class names to match with titles. |
| | remove_matched_titles (bool): Whether to remove matched titles from the boxes. |
| | |
| | Returns: |
| | boxes (numpy.ndarray): Array of bounding boxes with shape (M, 4). |
| | confs (numpy.ndarray): Array of confidence scores with shape (M,). |
| | labels (numpy.ndarray): Array of labels with shape (M,). |
| | found_title (list): List of indices of matched titles. |
| | no_found_title (list): List of indices of unmatched titles. |
| | match_dist (float, optional): Maximum distance for matching. Defaults to 0.1. |
| | """ |
| | |
| | title_ids = np.where(labels == classes.index("title"))[0] |
| | order = np.concatenate([np.delete(np.arange(len(boxes)), title_ids), title_ids]) |
| | boxes = boxes[order] |
| | confs = confs[order] |
| | labels = labels[order] |
| |
|
| | |
| | title_ids = np.where(labels == classes.index("title"))[0] |
| | to_match = np.where(np.isin(labels, [classes.index(c) for c in to_match_labels]))[0] |
| |
|
| | |
| | found_title, already_matched = [], [] |
| | for i in range(len(boxes)): |
| | if i not in to_match: |
| | continue |
| | merged_box, matched_title_ids = match_with_title( |
| | boxes[i], |
| | boxes[title_ids], |
| | already_matched=already_matched, |
| | match_dist=match_dist, |
| | ) |
| | if matched_title_ids is not None: |
| | |
| | boxes[i] = merged_box |
| | already_matched += matched_title_ids |
| | found_title.append(i) |
| |
|
| | if remove_matched_titles and len(already_matched): |
| | boxes = np.delete(boxes, title_ids[already_matched], axis=0) |
| | confs = np.delete(confs, title_ids[already_matched], axis=0) |
| | labels = np.delete(labels, title_ids[already_matched], axis=0) |
| |
|
| | return boxes, confs, labels, found_title |
| |
|