NicFromLM commited on
Commit
0da4795
·
verified ·
1 Parent(s): b4a071d

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +31 -14
utils.py CHANGED
@@ -334,27 +334,44 @@ def merge_overlapping_ranges(ranges):
334
  return merged_ranges
335
 
336
  def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
 
 
 
 
 
 
 
 
 
 
337
  text_bboxes = convert_to_list_of_lists(text_bboxes)
338
  sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
339
 
340
  if len(text_bboxes) == 0:
341
  return []
342
 
343
- def indices_of_same_elements(nums):
344
- groups = groupby(range(len(nums)), key=lambda i: nums[i])
345
- return [list(indices) for _, indices in groups]
346
-
347
  panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
348
- indices_of_texts = list(range(len(text_bboxes)))
349
- indices_of_texts, panel_id_for_text = zip(*sorted(zip(indices_of_texts, panel_id_for_text), key=lambda x: x[1]))
350
- indices_of_texts = list(indices_of_texts)
351
- grouped_indices = indices_of_same_elements(panel_id_for_text)
352
- for group in grouped_indices:
353
- subset_of_text_indices = [indices_of_texts[i] for i in group]
354
- text_bboxes_of_subset = [text_bboxes[i] for i in subset_of_text_indices]
355
- sorted_subset_indices = sort_texts_within_panel(text_bboxes_of_subset)
356
- indices_of_texts[group[0] : group[-1] + 1] = [subset_of_text_indices[i] for i in sorted_subset_indices]
357
- return indices_of_texts
 
 
 
 
 
 
 
 
 
 
358
 
359
  def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
360
  text_to_panel_mapping = []
 
334
  return merged_ranges
335
 
336
  def sort_text_boxes_in_reading_order(text_bboxes, sorted_panel_bboxes):
337
+ """
338
+ Sort text boxes in reading order (left-to-right, top-to-bottom) within panels.
339
+
340
+ Args:
341
+ text_bboxes: List of text bounding boxes [x1, y1, x2, y2].
342
+ sorted_panel_bboxes: List of panel bounding boxes, assumed to be sorted in reading order.
343
+
344
+ Returns:
345
+ List of indices of text boxes sorted in reading order.
346
+ """
347
  text_bboxes = convert_to_list_of_lists(text_bboxes)
348
  sorted_panel_bboxes = convert_to_list_of_lists(sorted_panel_bboxes)
349
 
350
  if len(text_bboxes) == 0:
351
  return []
352
 
353
+ # Step 1: Map each text box to its panel
 
 
 
354
  panel_id_for_text = get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes)
355
+
356
+ # Step 2: Sort text boxes first by their panel and then within each panel
357
+ text_indices = list(range(len(text_bboxes)))
358
+ # Sort by panel ID first
359
+ text_indices.sort(key=lambda i: panel_id_for_text[i])
360
+
361
+ # Step 3: Group by panel ID and sort within each panel
362
+ from itertools import groupby
363
+
364
+ sorted_indices = []
365
+ for panel_id, group in groupby(text_indices, key=lambda i: panel_id_for_text[i]):
366
+ group_indices = list(group)
367
+ text_bboxes_within_panel = [text_bboxes[i] for i in group_indices]
368
+
369
+ # Sort within the panel (left-to-right, top-to-bottom)
370
+ sorted_within_panel = sort_texts_within_panel(text_bboxes_within_panel)
371
+ sorted_indices.extend([group_indices[i] for i in sorted_within_panel])
372
+
373
+ return sorted_indices
374
+
375
 
376
  def get_text_to_panel_mapping(text_bboxes, sorted_panel_bboxes):
377
  text_to_panel_mapping = []