File size: 8,336 Bytes
ab35335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import numpy as np
import numpy.typing as npt
from typing import List, Tuple, Optional
def expand_boxes(
boxes: npt.NDArray[np.float64],
r_x: Tuple[float, float] = (1, 1),
r_y: Tuple[float, float] = (1, 1),
size_agnostic: bool = True,
) -> npt.NDArray[np.float64]:
"""
Expands bounding boxes by a specified ratio.
Expected box format is normalized [x_min, y_min, x_max, y_max].
Args:
boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion).
r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion).
size_agnostic (bool, optional): Expand independently of the box shape. Defaults to True.
Returns:
numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range.
"""
old_boxes = boxes.copy()
if not size_agnostic:
h = boxes[:, 3] - boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
else:
h, w = 1, 1
boxes[:, 0] -= w * (r_x[0] - 1) # left
boxes[:, 2] += w * (r_x[1] - 1) # right
boxes[:, 1] -= h * (r_y[0] - 1) # up
boxes[:, 3] += h * (r_y[1] - 1) # down
boxes = np.clip(boxes, 0, 1)
# Enforce non-overlapping boxes
for i in range(len(boxes)):
for j in range(i + 1, len(boxes)):
iou = bb_iou_array(boxes[i][None], boxes[j])[0]
old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0]
# print(iou, old_iou)
if iou > 0.05 and old_iou < 0.1:
if boxes[i, 1] < boxes[j, 1]: # i above j
boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3])
if old_iou > 0:
boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1])
else:
boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3])
if old_iou > 0:
boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1])
return boxes
def merge_boxes(
b1: npt.NDArray[np.float64], b2: npt.NDArray[np.float64]
) -> npt.NDArray[np.float64]:
"""
Merges two bounding boxes into a single box that encompasses both.
Args:
b1 (numpy.ndarray): First bounding box [x_min, y_min, x_max, y_max].
b2 (numpy.ndarray): Second bounding box [x_min, y_min, x_max, y_max].
Returns:
numpy.ndarray: A single bounding box that covers both input boxes.
"""
b = b1.copy()
b[0] = min(b1[0], b2[0])
b[1] = min(b1[1], b2[1])
b[2] = max(b1[2], b2[2])
b[3] = max(b1[3], b2[3])
return b
def bb_iou_array(
boxes: npt.NDArray[np.float64], new_box: npt.NDArray[np.float64]
) -> npt.NDArray[np.float64]:
"""
Calculates the Intersection over Union (IoU) between a box and an array of boxes.
Args:
boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max].
Returns:
numpy.ndarray: Array of IoU values between the new_box and each box in the array.
"""
# bb interesection over union
xA = np.maximum(boxes[:, 0], new_box[0])
yA = np.maximum(boxes[:, 1], new_box[1])
xB = np.minimum(boxes[:, 2], new_box[2])
yB = np.minimum(boxes[:, 3], new_box[3])
interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0)
# compute the area of both the prediction and ground-truth rectangles
boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1])
iou = interArea / (boxAArea + boxBArea - interArea)
return iou
def match_with_title(
box: npt.NDArray[np.float64],
title_boxes: npt.NDArray[np.float64],
match_dist: float = 0.1,
delta: float = 1.,
already_matched: List[int] = [],
) -> Tuple[Optional[npt.NDArray[np.float64]], Optional[List[int]]]:
"""
Matches a bounding box with a title bounding box based on IoU or proximity.
Args:
box (numpy.ndarray): Bounding box to match with title [x_min, y_min, x_max, y_max].
title_boxes (numpy.ndarray): Array of title bounding boxes with shape (N, 4).
match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
delta (float, optional): Multiplier for matching several titles. Defaults to 1..
already_matched (list, optional): List of already matched title indices. Defaults to [].
Returns:
tuple or None: If matched, returns a tuple of (merged_bbox, updated_title_boxes).
If no match is found, returns None, None.
"""
if not len(title_boxes):
return None, None
dist_above = np.abs(title_boxes[:, 3] - box[1])
dist_below = np.abs(box[3] - title_boxes[:, 1])
dist_left = np.abs(title_boxes[:, 0] - box[0])
dist_center = np.abs(title_boxes[:, 0] + title_boxes[:, 2] - box[0] - box[2]) / 2
dists = np.min([dist_above, dist_below], 0)
dists += np.min([dist_left, dist_center], 0) / 2
ious = bb_iou_array(title_boxes, box)
dists = np.where(ious > 0, min(match_dist - 0.01, np.min(dists)) / delta, dists)
if len(already_matched):
dists[already_matched] = match_dist * 10 # Remove already matched titles
matches = None
if np.min(dists) <= match_dist:
matches = np.where(
dists <= min(match_dist, np.min(dists) * delta)
)[0]
if matches is not None:
new_bbox = box
for match in matches:
new_bbox = merge_boxes(new_bbox, title_boxes[match])
return new_bbox, list(matches)
else:
return None, None
def match_boxes_with_title(
boxes: npt.NDArray[np.float64],
confs: npt.NDArray[np.float64],
labels: npt.NDArray[np.int_],
classes: List[str],
to_match_labels: List[str] = ["chart"],
remove_matched_titles: bool = False,
match_dist: float = 0.1,
) -> Tuple[
npt.NDArray[np.float64],
npt.NDArray[np.float64],
npt.NDArray[np.int_],
List[int],
]:
"""
Matches charts with title.
Args:
boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
confs (numpy.ndarray): Array of confidence scores with shape (N,).
labels (numpy.ndarray): Array of labels with shape (N,).
classes (list): List of class names.
to_match_labels (list): List of class names to match with titles.
remove_matched_titles (bool): Whether to remove matched titles from the boxes.
Returns:
boxes (numpy.ndarray): Array of bounding boxes with shape (M, 4).
confs (numpy.ndarray): Array of confidence scores with shape (M,).
labels (numpy.ndarray): Array of labels with shape (M,).
found_title (list): List of indices of matched titles.
no_found_title (list): List of indices of unmatched titles.
match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
"""
# Put titles at the end
title_ids = np.where(labels == classes.index("title"))[0]
order = np.concatenate([np.delete(np.arange(len(boxes)), title_ids), title_ids])
boxes = boxes[order]
confs = confs[order]
labels = labels[order]
# Ids
title_ids = np.where(labels == classes.index("title"))[0]
to_match = np.where(np.isin(labels, [classes.index(c) for c in to_match_labels]))[0]
# Matching
found_title, already_matched = [], []
for i in range(len(boxes)):
if i not in to_match:
continue
merged_box, matched_title_ids = match_with_title(
boxes[i],
boxes[title_ids],
already_matched=already_matched,
match_dist=match_dist,
)
if matched_title_ids is not None:
# print(f'Merged {classes[int(labels[i])]} at idx #{i} with title {matched_title_ids[-1]}') # noqa
boxes[i] = merged_box
already_matched += matched_title_ids
found_title.append(i)
if remove_matched_titles and len(already_matched):
boxes = np.delete(boxes, title_ids[already_matched], axis=0)
confs = np.delete(confs, title_ids[already_matched], axis=0)
labels = np.delete(labels, title_ids[already_matched], axis=0)
return boxes, confs, labels, found_title
|