Update utils.py
Browse files
utils.py
CHANGED
|
@@ -334,27 +334,44 @@ def merge_overlapping_ranges(ranges):
|
|
| 334 |
return merged_ranges
|
| 335 |
|
| 336 |
def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
text_bboxes = convert_to_list_of_lists(text_bboxes)
|
| 338 |
sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
|
| 339 |
|
| 340 |
if len(text_bboxes) == 0:
|
| 341 |
return []
|
| 342 |
|
| 343 |
-
|
| 344 |
-
groups = groupby(range(len(nums)), key=lambda i: nums[i])
|
| 345 |
-
return [list(indices) for _, indices in groups]
|
| 346 |
-
|
| 347 |
panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
|
| 360 |
text_to_panel_mapping = []
|
|
|
|
| 334 |
return merged_ranges
|
| 335 |
|
| 336 |
def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
|
| 337 |
+
"""
|
| 338 |
+
Sort text boxes in reading order (left-to-right, top-to-bottom) within panels.
|
| 339 |
+
|
| 340 |
+
Args:
|
| 341 |
+
text_bboxes: List of text bounding boxes [x1, y1, x2, y2].
|
| 342 |
+
sorted_panel_bboxes: List of panel bounding boxes, assumed to be sorted in reading order.
|
| 343 |
+
|
| 344 |
+
Returns:
|
| 345 |
+
List of indices of text boxes sorted in reading order.
|
| 346 |
+
"""
|
| 347 |
text_bboxes = convert_to_list_of_lists(text_bboxes)
|
| 348 |
sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
|
| 349 |
|
| 350 |
if len(text_bboxes) == 0:
|
| 351 |
return []
|
| 352 |
|
| 353 |
+
# Step 1: Map each text box to its panel
|
|
|
|
|
|
|
|
|
|
| 354 |
panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
|
| 355 |
+
|
| 356 |
+
# Step 2: Sort text boxes first by their panel and then within each panel
|
| 357 |
+
text_indices = list(range(len(text_bboxes)))
|
| 358 |
+
# Sort by panel ID first
|
| 359 |
+
text_indices.sort(key=lambda i: panel_id_for_text[i])
|
| 360 |
+
|
| 361 |
+
# Step 3: Group by panel ID and sort within each panel
|
| 362 |
+
from itertools import groupby
|
| 363 |
+
|
| 364 |
+
sorted_indices = []
|
| 365 |
+
for panel_id, group in groupby(text_indices, key=lambda i: panel_id_for_text[i]):
|
| 366 |
+
group_indices = list(group)
|
| 367 |
+
text_bboxes_within_panel = [text_bboxes[i] for i in group_indices]
|
| 368 |
+
|
| 369 |
+
# Sort within the panel (left-to-right, top-to-bottom)
|
| 370 |
+
sorted_within_panel = sort_texts_within_panel(text_bboxes_within_panel)
|
| 371 |
+
sorted_indices.extend([group_indices[i] for i in sorted_within_panel])
|
| 372 |
+
|
| 373 |
+
return sorted_indices
|
| 374 |
+
|
| 375 |
|
| 376 |
def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
|
| 377 |
text_to_panel_mapping = []
|