Update utils.py
Browse files
utils.py
CHANGED
|
@@ -90,20 +90,15 @@ class UnionFind:
|
|
| 90 |
return labels
|
| 91 |
|
| 92 |
def visualise_single_image_prediction(image_as_np_array, predictions, filename):
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
subplot.imshow(image_as_np_array)
|
| 95 |
plot_bboxes(subplot, predictions["panels"], color="green")
|
| 96 |
-
plot_bboxes(subplot, predictions["texts"], color="red",
|
| 97 |
plot_bboxes(subplot, predictions["characters"], color="blue")
|
| 98 |
-
plot_bboxes(subplot, predictions["tails"], color="purple")
|
| 99 |
-
|
| 100 |
-
for i, name in enumerate(predictions["character_names"]):
|
| 101 |
-
char_bbox = predictions["characters"][i]
|
| 102 |
-
x1, y1, x2, y2 = char_bbox
|
| 103 |
-
subplot.text(x1, y1 - 2, name,
|
| 104 |
-
verticalalignment='bottom', horizontalalignment='left',
|
| 105 |
-
bbox=dict(facecolor='blue', alpha=1, edgecolor='none'), # Background settings
|
| 106 |
-
color='white', fontsize=8)
|
| 107 |
|
| 108 |
COLOURS = [
|
| 109 |
"#b7ff51", # green
|
|
@@ -146,24 +141,14 @@ def visualise_single_image_prediction(image_as_np_array, predictions, filename):
|
|
| 146 |
subplot.plot([x2], [y2], color=random_colour, marker="o", markersize=5)
|
| 147 |
|
| 148 |
for (i, j) in predictions["text_character_associations"]:
|
|
|
|
| 149 |
bbox_i = predictions["texts"][i]
|
| 150 |
bbox_j = predictions["characters"][j]
|
| 151 |
-
if not predictions["is_essential_text"][i]:
|
| 152 |
-
continue
|
| 153 |
-
x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
|
| 154 |
-
y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
|
| 155 |
-
x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
|
| 156 |
-
y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
|
| 157 |
-
subplot.plot([x1, x2], [y1, y2], color="red", linewidth=2, linestyle="dashed")
|
| 158 |
-
|
| 159 |
-
for (i, j) in predictions["text_tail_associations"]:
|
| 160 |
-
bbox_i = predictions["texts"][i]
|
| 161 |
-
bbox_j = predictions["tails"][j]
|
| 162 |
x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
|
| 163 |
y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
|
| 164 |
x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
|
| 165 |
y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
|
| 166 |
-
subplot.plot([x1, x2], [y1, y2], color="
|
| 167 |
|
| 168 |
subplot.axis("off")
|
| 169 |
if filename is not None:
|
|
@@ -174,18 +159,17 @@ def visualise_single_image_prediction(image_as_np_array, predictions, filename):
|
|
| 174 |
plt.close()
|
| 175 |
return image
|
| 176 |
|
| 177 |
-
def plot_bboxes(subplot, bboxes, color="red",
|
| 178 |
-
if visibility is None:
|
| 179 |
-
visibility = [1] * len(bboxes)
|
| 180 |
for id, bbox in enumerate(bboxes):
|
| 181 |
-
if visibility[id] == 0:
|
| 182 |
-
continue
|
| 183 |
w = bbox[2] - bbox[0]
|
| 184 |
h = bbox[3] - bbox[1]
|
| 185 |
rect = patches.Rectangle(
|
| 186 |
bbox[:2], w, h, linewidth=1, edgecolor=color, facecolor="none", linestyle="solid"
|
| 187 |
)
|
| 188 |
subplot.add_patch(rect)
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
def sort_panels(rects):
|
| 191 |
before_rects = convert_to_list_of_lists(rects)
|
|
@@ -334,44 +318,27 @@ def merge_overlapping_ranges(ranges):
|
|
| 334 |
return merged_ranges
|
| 335 |
|
| 336 |
def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
|
| 337 |
-
"""
|
| 338 |
-
Sort text boxes in reading order (left-to-right, top-to-bottom) within panels.
|
| 339 |
-
|
| 340 |
-
Args:
|
| 341 |
-
text_bboxes: List of text bounding boxes [x1, y1, x2, y2].
|
| 342 |
-
sorted_panel_bboxes: List of panel bounding boxes, assumed to be sorted in reading order.
|
| 343 |
-
|
| 344 |
-
Returns:
|
| 345 |
-
List of indices of text boxes sorted in reading order.
|
| 346 |
-
"""
|
| 347 |
text_bboxes = convert_to_list_of_lists(text_bboxes)
|
| 348 |
sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
|
| 349 |
|
| 350 |
if len(text_bboxes) == 0:
|
| 351 |
return []
|
| 352 |
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
# Step 2: Sort text boxes first by their panel and then within each panel
|
| 357 |
-
text_indices = list(range(len(text_bboxes)))
|
| 358 |
-
# Sort by panel ID first
|
| 359 |
-
text_indices.sort(key=lambda i: panel_id_for_text[i])
|
| 360 |
-
|
| 361 |
-
# Step 3: Group by panel ID and sort within each panel
|
| 362 |
-
from itertools import groupby
|
| 363 |
-
|
| 364 |
-
sorted_indices = []
|
| 365 |
-
for panel_id, group in groupby(text_indices, key=lambda i: panel_id_for_text[i]):
|
| 366 |
-
group_indices = list(group)
|
| 367 |
-
text_bboxes_within_panel = [text_bboxes[i] for i in group_indices]
|
| 368 |
-
|
| 369 |
-
# Sort within the panel (left-to-right, top-to-bottom)
|
| 370 |
-
sorted_within_panel = sort_texts_within_panel(text_bboxes_within_panel)
|
| 371 |
-
sorted_indices.extend([group_indices[i] for i in sorted_within_panel])
|
| 372 |
-
|
| 373 |
-
return sorted_indices
|
| 374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
|
| 377 |
text_to_panel_mapping = []
|
|
@@ -394,10 +361,24 @@ def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
|
|
| 394 |
return text_to_panel_mapping
|
| 395 |
|
| 396 |
def sort_texts_within_panel(rects):
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
def x1y1wh_to_x1y1x2y2(bbox):
|
| 403 |
x1, y1, w, h = bbox
|
|
|
|
| 90 |
return labels
|
| 91 |
|
| 92 |
def visualise_single_image_prediction(image_as_np_array, predictions, filename):
|
| 93 |
+
h, w = image_as_np_array.shape[:2]
|
| 94 |
+
if h > w:
|
| 95 |
+
figure, subplot = plt.subplots(1, 1, figsize=(10, 10 * h / w))
|
| 96 |
+
else:
|
| 97 |
+
figure, subplot = plt.subplots(1, 1, figsize=(10 * w / h, 10))
|
| 98 |
subplot.imshow(image_as_np_array)
|
| 99 |
plot_bboxes(subplot, predictions["panels"], color="green")
|
| 100 |
+
plot_bboxes(subplot, predictions["texts"], color="red", add_index=True)
|
| 101 |
plot_bboxes(subplot, predictions["characters"], color="blue")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
COLOURS = [
|
| 104 |
"#b7ff51", # green
|
|
|
|
| 141 |
subplot.plot([x2], [y2], color=random_colour, marker="o", markersize=5)
|
| 142 |
|
| 143 |
for (i, j) in predictions["text_character_associations"]:
|
| 144 |
+
score = predictions["dialog_confidences"][i]
|
| 145 |
bbox_i = predictions["texts"][i]
|
| 146 |
bbox_j = predictions["characters"][j]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
|
| 148 |
y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
|
| 149 |
x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
|
| 150 |
y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
|
| 151 |
+
subplot.plot([x1, x2], [y1, y2], color="red", linewidth=2, linestyle="dashed", alpha=score)
|
| 152 |
|
| 153 |
subplot.axis("off")
|
| 154 |
if filename is not None:
|
|
|
|
| 159 |
plt.close()
|
| 160 |
return image
|
| 161 |
|
| 162 |
+
def plot_bboxes(subplot, bboxes, color="red", add_index=False):
|
|
|
|
|
|
|
| 163 |
for id, bbox in enumerate(bboxes):
|
|
|
|
|
|
|
| 164 |
w = bbox[2] - bbox[0]
|
| 165 |
h = bbox[3] - bbox[1]
|
| 166 |
rect = patches.Rectangle(
|
| 167 |
bbox[:2], w, h, linewidth=1, edgecolor=color, facecolor="none", linestyle="solid"
|
| 168 |
)
|
| 169 |
subplot.add_patch(rect)
|
| 170 |
+
if add_index:
|
| 171 |
+
cx, cy = bbox[0] + w / 2, bbox[1] + h / 2
|
| 172 |
+
subplot.text(cx, cy, str(id), color=color, fontsize=10, ha="center", va="center")
|
| 173 |
|
| 174 |
def sort_panels(rects):
|
| 175 |
before_rects = convert_to_list_of_lists(rects)
|
|
|
|
| 318 |
return merged_ranges
|
| 319 |
|
| 320 |
def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
text_bboxes = convert_to_list_of_lists(text_bboxes)
|
| 322 |
sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
|
| 323 |
|
| 324 |
if len(text_bboxes) == 0:
|
| 325 |
return []
|
| 326 |
|
| 327 |
+
def indices_of_same_elements(nums):
|
| 328 |
+
groups = groupby(range(len(nums)), key=lambda i: nums[i])
|
| 329 |
+
return [list(indices) for _, indices in groups]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
+
panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
|
| 332 |
+
indices_of_texts = list(range(len(text_bboxes)))
|
| 333 |
+
indices_of_texts, panel_id_for_text = zip(*sorted(zip(indices_of_texts, panel_id_for_text), key=lambda x: x[1]))
|
| 334 |
+
indices_of_texts = list(indices_of_texts)
|
| 335 |
+
grouped_indices = indices_of_same_elements(panel_id_for_text)
|
| 336 |
+
for group in grouped_indices:
|
| 337 |
+
subset_of_text_indices = [indices_of_texts[i] for i in group]
|
| 338 |
+
text_bboxes_of_subset = [text_bboxes[i] for i in subset_of_text_indices]
|
| 339 |
+
sorted_subset_indices = sort_texts_within_panel(text_bboxes_of_subset)
|
| 340 |
+
indices_of_texts[group[0] : group[-1] + 1] = [subset_of_text_indices[i] for i in sorted_subset_indices]
|
| 341 |
+
return indices_of_texts
|
| 342 |
|
| 343 |
def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
|
| 344 |
text_to_panel_mapping = []
|
|
|
|
| 361 |
return text_to_panel_mapping
|
| 362 |
|
| 363 |
def sort_texts_within_panel(rects):
|
| 364 |
+
smallest_y = float("inf")
|
| 365 |
+
smallest_x = float("inf")
|
| 366 |
+
for i, rect in enumerate(rects):
|
| 367 |
+
x1, y1, x2, y2 = rect
|
| 368 |
+
smallest_y = min(smallest_y, y1)
|
| 369 |
+
smallest_x = min(smallest_x, x1)
|
| 370 |
+
|
| 371 |
+
reference_point = Point(smallest_x, smallest_y)
|
| 372 |
|
| 373 |
+
polygons_and_index = []
|
| 374 |
+
for i, rect in enumerate(rects):
|
| 375 |
+
x1, y1, x2, y2 = rect
|
| 376 |
+
polygons_and_index.append((box(x1,y1,x2,y2), i))
|
| 377 |
+
|
| 378 |
+
# sort points by distance to this new reference point
|
| 379 |
+
polygons_and_index = sorted(polygons_and_index, key=lambda x: reference_point.distance(x[0]))
|
| 380 |
+
indices = [x[1] for x in polygons_and_index]
|
| 381 |
+
return indices
|
| 382 |
|
| 383 |
def x1y1wh_to_x1y1x2y2(bbox):
|
| 384 |
x1, y1, w, h = bbox
|