NicFromLM commited on
Commit
13dd444
·
verified ·
1 Parent(s): 0da4795

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +43 -62
utils.py CHANGED
@@ -90,20 +90,15 @@ class UnionFind:
90
  return labels
91
 
92
  def visualise_single_image_prediction(image_as_np_array, predictions, filename):
93
- figure, subplot = plt.subplots(1, 1, figsize=(10, 10))
 
 
 
 
94
  subplot.imshow(image_as_np_array)
95
  plot_bboxes(subplot, predictions["panels"], color="green")
96
- plot_bboxes(subplot, predictions["texts"], color="red", visibility=predictions["is_essential_text"])
97
  plot_bboxes(subplot, predictions["characters"], color="blue")
98
- plot_bboxes(subplot, predictions["tails"], color="purple")
99
-
100
- for i, name in enumerate(predictions["character_names"]):
101
- char_bbox = predictions["characters"][i]
102
- x1, y1, x2, y2 = char_bbox
103
- subplot.text(x1, y1 - 2, name,
104
- verticalalignment='bottom', horizontalalignment='left',
105
- bbox=dict(facecolor='blue', alpha=1, edgecolor='none'), # Background settings
106
- color='white', fontsize=8)
107
 
108
  COLOURS = [
109
  "#b7ff51", # green
@@ -146,24 +141,14 @@ def visualise_single_image_prediction(image_as_np_array, predictions, filename):
146
  subplot.plot([x2], [y2], color=random_colour, marker="o", markersize=5)
147
 
148
  for (i, j) in predictions["text_character_associations"]:
 
149
  bbox_i = predictions["texts"][i]
150
  bbox_j = predictions["characters"][j]
151
- if not predictions["is_essential_text"][i]:
152
- continue
153
- x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
154
- y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
155
- x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
156
- y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
157
- subplot.plot([x1, x2], [y1, y2], color="red", linewidth=2, linestyle="dashed")
158
-
159
- for (i, j) in predictions["text_tail_associations"]:
160
- bbox_i = predictions["texts"][i]
161
- bbox_j = predictions["tails"][j]
162
  x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
163
  y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
164
  x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
165
  y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
166
- subplot.plot([x1, x2], [y1, y2], color="purple", linewidth=2, linestyle="dashed")
167
 
168
  subplot.axis("off")
169
  if filename is not None:
@@ -174,18 +159,17 @@ def visualise_single_image_prediction(image_as_np_array, predictions, filename):
174
  plt.close()
175
  return image
176
 
177
- def plot_bboxes(subplot, bboxes, color="red", visibility=None):
178
- if visibility is None:
179
- visibility = [1] * len(bboxes)
180
  for id, bbox in enumerate(bboxes):
181
- if visibility[id] == 0:
182
- continue
183
  w = bbox[2] - bbox[0]
184
  h = bbox[3] - bbox[1]
185
  rect = patches.Rectangle(
186
  bbox[:2], w, h, linewidth=1, edgecolor=color, facecolor="none", linestyle="solid"
187
  )
188
  subplot.add_patch(rect)
 
 
 
189
 
190
  def sort_panels(rects):
191
  before_rects = convert_to_list_of_lists(rects)
@@ -334,44 +318,27 @@ def merge_overlapping_ranges(ranges):
334
  return merged_ranges
335
 
336
  def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
337
- """
338
- Sort text boxes in reading order (left-to-right, top-to-bottom) within panels.
339
-
340
- Args:
341
- text_bboxes: List of text bounding boxes [x1, y1, x2, y2].
342
- sorted_panel_bboxes: List of panel bounding boxes, assumed to be sorted in reading order.
343
-
344
- Returns:
345
- List of indices of text boxes sorted in reading order.
346
- """
347
  text_bboxes = convert_to_list_of_lists(text_bboxes)
348
  sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
349
 
350
  if len(text_bboxes) == 0:
351
  return []
352
 
353
- # Step 1: Map each text box to its panel
354
- panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
355
-
356
- # Step 2: Sort text boxes first by their panel and then within each panel
357
- text_indices = list(range(len(text_bboxes)))
358
- # Sort by panel ID first
359
- text_indices.sort(key=lambda i: panel_id_for_text[i])
360
-
361
- # Step 3: Group by panel ID and sort within each panel
362
- from itertools import groupby
363
-
364
- sorted_indices = []
365
- for panel_id, group in groupby(text_indices, key=lambda i: panel_id_for_text[i]):
366
- group_indices = list(group)
367
- text_bboxes_within_panel = [text_bboxes[i] for i in group_indices]
368
-
369
- # Sort within the panel (left-to-right, top-to-bottom)
370
- sorted_within_panel = sort_texts_within_panel(text_bboxes_within_panel)
371
- sorted_indices.extend([group_indices[i] for i in sorted_within_panel])
372
-
373
- return sorted_indices
374
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
  def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
377
  text_to_panel_mapping = []
@@ -394,10 +361,24 @@ def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
394
  return text_to_panel_mapping
395
 
396
  def sort_texts_within_panel(rects):
397
- # Sort primarily by Y-coordinate (top-to-bottom), then X-coordinate (left-to-right)
398
- sorted_indices = sorted(range(len(rects)), key=lambda i: (rects[i][1], rects[i][0]))
399
- return sorted_indices
 
 
 
 
 
400
 
 
 
 
 
 
 
 
 
 
401
 
402
  def x1y1wh_to_x1y1x2y2(bbox):
403
  x1, y1, w, h = bbox
 
90
  return labels
91
 
92
  def visualise_single_image_prediction(image_as_np_array, predictions, filename):
93
+ h, w = image_as_np_array.shape[:2]
94
+ if h > w:
95
+ figure, subplot = plt.subplots(1, 1, figsize=(10, 10 * h / w))
96
+ else:
97
+ figure, subplot = plt.subplots(1, 1, figsize=(10 * w / h, 10))
98
  subplot.imshow(image_as_np_array)
99
  plot_bboxes(subplot, predictions["panels"], color="green")
100
+ plot_bboxes(subplot, predictions["texts"], color="red", add_index=True)
101
  plot_bboxes(subplot, predictions["characters"], color="blue")
 
 
 
 
 
 
 
 
 
102
 
103
  COLOURS = [
104
  "#b7ff51", # green
 
141
  subplot.plot([x2], [y2], color=random_colour, marker="o", markersize=5)
142
 
143
  for (i, j) in predictions["text_character_associations"]:
144
+ score = predictions["dialog_confidences"][i]
145
  bbox_i = predictions["texts"][i]
146
  bbox_j = predictions["characters"][j]
 
 
 
 
 
 
 
 
 
 
 
147
  x1 = bbox_i[0] + (bbox_i[2] - bbox_i[0]) / 2
148
  y1 = bbox_i[1] + (bbox_i[3] - bbox_i[1]) / 2
149
  x2 = bbox_j[0] + (bbox_j[2] - bbox_j[0]) / 2
150
  y2 = bbox_j[1] + (bbox_j[3] - bbox_j[1]) / 2
151
+ subplot.plot([x1, x2], [y1, y2], color="red", linewidth=2, linestyle="dashed", alpha=score)
152
 
153
  subplot.axis("off")
154
  if filename is not None:
 
159
  plt.close()
160
  return image
161
 
162
+ def plot_bboxes(subplot, bboxes, color="red", add_index=False):
 
 
163
  for id, bbox in enumerate(bboxes):
 
 
164
  w = bbox[2] - bbox[0]
165
  h = bbox[3] - bbox[1]
166
  rect = patches.Rectangle(
167
  bbox[:2], w, h, linewidth=1, edgecolor=color, facecolor="none", linestyle="solid"
168
  )
169
  subplot.add_patch(rect)
170
+ if add_index:
171
+ cx, cy = bbox[0] + w / 2, bbox[1] + h / 2
172
+ subplot.text(cx, cy, str(id), color=color, fontsize=10, ha="center", va="center")
173
 
174
  def sort_panels(rects):
175
  before_rects = convert_to_list_of_lists(rects)
 
318
  return merged_ranges
319
 
320
  def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
 
 
 
 
 
 
 
 
 
 
321
  text_bboxes = convert_to_list_of_lists(text_bboxes)
322
  sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
323
 
324
  if len(text_bboxes) == 0:
325
  return []
326
 
327
+ def indices_of_same_elements(nums):
328
+ groups = groupby(range(len(nums)), key=lambda i: nums[i])
329
+ return [list(indices) for _, indices in groups]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
332
+ indices_of_texts = list(range(len(text_bboxes)))
333
+ indices_of_texts, panel_id_for_text = zip(*sorted(zip(indices_of_texts, panel_id_for_text), key=lambda x: x[1]))
334
+ indices_of_texts = list(indices_of_texts)
335
+ grouped_indices = indices_of_same_elements(panel_id_for_text)
336
+ for group in grouped_indices:
337
+ subset_of_text_indices = [indices_of_texts[i] for i in group]
338
+ text_bboxes_of_subset = [text_bboxes[i] for i in subset_of_text_indices]
339
+ sorted_subset_indices = sort_texts_within_panel(text_bboxes_of_subset)
340
+ indices_of_texts[group[0] : group[-1] + 1] = [subset_of_text_indices[i] for i in sorted_subset_indices]
341
+ return indices_of_texts
342
 
343
  def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
344
  text_to_panel_mapping = []
 
361
  return text_to_panel_mapping
362
 
363
  def sort_texts_within_panel(rects):
364
+ smallest_y = float("inf")
365
+ smallest_x = float("inf")
366
+ for i, rect in enumerate(rects):
367
+ x1, y1, x2, y2 = rect
368
+ smallest_y = min(smallest_y, y1)
369
+ smallest_x = min(smallest_x, x1)
370
+
371
+ reference_point = Point(smallest_x, smallest_y)
372
 
373
+ polygons_and_index = []
374
+ for i, rect in enumerate(rects):
375
+ x1, y1, x2, y2 = rect
376
+ polygons_and_index.append((box(x1,y1,x2,y2), i))
377
+
378
+ # sort points by distance to this new reference point
379
+ polygons_and_index = sorted(polygons_and_index, key=lambda x: reference_point.distance(x[0]))
380
+ indices = [x[1] for x in polygons_and_index]
381
+ return indices
382
 
383
  def x1y1wh_to_x1y1x2y2(bbox):
384
  x1, y1, w, h = bbox