qqwjq1981 commited on
Commit
11cf5cb
Β·
verified Β·
1 Parent(s): c9c0e3f

Update utils/bubble_utils.py

Browse files
Files changed (1) hide show
  1. utils/bubble_utils.py +43 -70
utils/bubble_utils.py CHANGED
@@ -15,7 +15,7 @@ from utils.polygon_utils import (
15
  )
16
  from utils.bubble_detect import detect_speech_bubbles_robust
17
  from utils.u2net_detector import detect_bubbles_u2net
18
- from utils.bubble_detect_rtdetr import detect_bubbles_rtdetr
19
 
20
  def bbox_to_polygon(bbox):
21
  """
@@ -78,124 +78,97 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
78
  # ===================== Main Bubble Translation Pipeline ===================
79
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
80
  """
81
- End-to-end bubble translation pipeline:
82
- 1. Detect speech bubbles
83
- 2. OCR full page
84
- 3. Correct OCR polygons using bubble polygons
85
- 4. Render translated text using corrected polygons
86
- 5. Split into chunks
87
  """
 
88
  # -------------------------------------------------------
89
- # 1) Load image
90
  # -------------------------------------------------------
91
  filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
92
- print(f"πŸ“„ bubble_pipeline_single: filename={filename}, size={full_img.size}")
93
 
94
  # -------------------------------------------------------
95
- # 2) Bubble detection (RT-DETR comic model)
96
  # -------------------------------------------------------
97
- detections = detect_bubbles_rtdetr(full_img)
98
-
99
- bubble_boxes = [d["bbox"] for d in detections if d["class"] == 0] # pure bubble shapes
100
- bubble_text_boxes = [d["bbox"] for d in detections if d["class"] == 1] # text inside bubble
101
- free_text_boxes = [d["bbox"] for d in detections if d["class"] == 2] # text outside bubble
102
 
103
- # Convert bubble boxes β†’ polygons for your polygon-based pipeline
104
- bubble_polygons = [bbox_to_polygon(b) for b in bubble_boxes]
105
-
106
- print(f"πŸ” RT-DETR: {len(bubble_polygons)} bubbles, "
107
- f"{len(bubble_text_boxes)} bubble-text regions, "
108
- f"{len(free_text_boxes)} free-text regions")
109
-
110
 
111
  if debug:
112
- visualize_all_debug(full_img, [], bubble_polygons,
113
- step_name="bubbles_only",
114
- prefix="bubble_dbg")
 
115
 
116
  # -------------------------------------------------------
117
- # 3) OCR globally
118
  # -------------------------------------------------------
119
  translations = extract_and_translate_chunk(full_img)
120
- print(f"πŸ“ OCR found {len(translations)} text regions")
121
 
122
- if len(translations) == 0:
123
- print("⚠️ No OCR text detected β†’ fallback")
124
  return fallback_empty(file_obj, num_chunks, full_img)
125
 
126
- # SAVE ORIGINAL POLYGONS for debugging
127
  for t in translations:
128
- if "polygon" in t:
129
- t["original_polygon"] = t["polygon"]
130
 
131
  # -------------------------------------------------------
132
- # 4) Correct OCR polygons using bubble polygons
133
  # -------------------------------------------------------
134
- if len(bubble_polygons) > 0:
135
- print(f"✨ Correcting OCR polygons using bubble strategy: {polygon_strategy}")
 
136
  translations = correct_ocr_polygons_with_bubbles(
137
- translations,
138
- bubble_polygons,
139
- strategy=polygon_strategy,
140
  )
 
141
  matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
142
- print(f"βœ… Matched {matched}/{len(translations)} OCR regions to bubbles")
143
- else:
144
- print("⚠️ No bubble polygons detected β†’ skipping polygon correction")
145
 
146
  if debug:
147
- visualize_all_debug(full_img, translations, bubble_polygons,
148
- step_name="after_correction",
149
- prefix="bubble_dbg")
 
150
 
151
  # -------------------------------------------------------
152
- # 5) Render translated text
153
  # -------------------------------------------------------
154
  translated_full = full_img.copy()
155
 
156
  for t in translations:
157
- corrected_poly = t.get("polygon")
158
- original_poly = t.get("original_polygon")
159
  translated_text = t.get("translated", "")
160
-
161
- if not corrected_poly or not translated_text:
162
  continue
163
 
164
- # Get bubble polygon (if matched)
165
- bubble_poly = None
166
- idx = t.get("matched_bubble_idx")
167
- if idx is not None and 0 <= idx < len(bubble_polygons):
168
- bubble_poly = bubble_polygons[idx]
169
-
170
- # Render polygon is slightly shrunk
171
- render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
172
 
173
  translated_full = draw_translated_text_convex(
174
  translated_full,
175
- polygon_coords=corrected_poly, # corrected
176
  text=translated_text,
177
  font_path=FONT_PATH,
178
  font_scale=1.0,
179
- original_polygon=original_poly, # RED
180
- bubble_polygon=bubble_poly # BLUE
181
  )
182
 
183
  # -------------------------------------------------------
184
  # 6) Split for UI
185
  # -------------------------------------------------------
186
  if num_chunks > 1:
187
- _, _, chunks = load_and_split_image(file_obj, num_chunks)
188
- translated_chunks = split_image_into_chunks(translated_full, num_chunks)
189
  else:
190
- chunks = [full_img]
191
- translated_chunks = [translated_full]
192
-
193
- # -------------------------------------------------------
194
- # 7) Return output
195
- # -------------------------------------------------------
196
- orig_html = "".join([encode_image_to_html(c) for c in chunks])
197
- trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
198
 
 
 
199
  table_data = [[t["original"], t["translated"]] for t in translations]
200
 
201
  return filename, orig_html, trans_html, table_data, [translations]
 
15
  )
16
  from utils.bubble_detect import detect_speech_bubbles_robust
17
  from utils.u2net_detector import detect_bubbles_u2net
18
+ from utils.bubble_detect_rtdetr import detect_and_refine_bubbles
19
 
20
  def bbox_to_polygon(bbox):
21
  """
 
78
  # ===================== Main Bubble Translation Pipeline ===================
79
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
80
  """
81
+ Manga bubble-aware translation pipeline using RT-DETR bubble detection.
 
 
 
 
 
82
  """
83
+
84
  # -------------------------------------------------------
85
+ # 1) Load full image
86
  # -------------------------------------------------------
87
  filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
88
+ print(f"πŸ“„ bubble_pipeline_single: {filename}, size={full_img.size}")
89
 
90
  # -------------------------------------------------------
91
+ # 2) Detect & refine bubbles with RT-DETR
92
  # -------------------------------------------------------
 
 
 
 
 
93
 
94
+ bubble_polygons, interior_polygons = detect_and_refine_bubbles(full_img)
 
 
 
 
 
 
95
 
96
  if debug:
97
+ visualize_all_debug(
98
+ full_img, [], bubble_polygons,
99
+ step_name="bubbles_only", prefix="bubble_dbg"
100
+ )
101
 
102
  # -------------------------------------------------------
103
+ # 3) OCR full-page
104
  # -------------------------------------------------------
105
  translations = extract_and_translate_chunk(full_img)
106
+ print(f"πŸ“ OCR found {len(translations)} regions")
107
 
108
+ if not translations:
 
109
  return fallback_empty(file_obj, num_chunks, full_img)
110
 
111
+ # save original polygon for visualization
112
  for t in translations:
113
+ t["original_polygon"] = t.get("polygon")
 
114
 
115
  # -------------------------------------------------------
116
+ # 4) Match OCR text regions with refined bubble polygons
117
  # -------------------------------------------------------
118
+ if bubble_polygons:
119
+ print("✨ Correcting OCR polygons using refined bubbles...")
120
+
121
  translations = correct_ocr_polygons_with_bubbles(
122
+ translations, bubble_polygons, strategy=polygon_strategy
 
 
123
  )
124
+
125
  matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
126
+ print(f"βœ… Polygons matched to bubbles: {matched}/{len(translations)}")
 
 
127
 
128
  if debug:
129
+ visualize_all_debug(
130
+ full_img, translations, bubble_polygons,
131
+ step_name="after_correction", prefix="bubble_dbg"
132
+ )
133
 
134
  # -------------------------------------------------------
135
+ # 5) Render translated text using INTERIOR polygons
136
  # -------------------------------------------------------
137
  translated_full = full_img.copy()
138
 
139
  for t in translations:
140
+ idx = t.get("matched_bubble_idx")
 
141
  translated_text = t.get("translated", "")
142
+ if not translated_text:
 
143
  continue
144
 
145
+ if idx is not None and idx < len(interior_polygons):
146
+ render_poly = interior_polygons[idx] # βœ” refined interior polygon
147
+ else:
148
+ render_poly = shrink_or_expand_polygon(t["polygon"], shrink_ratio=0.92)
 
 
 
 
149
 
150
  translated_full = draw_translated_text_convex(
151
  translated_full,
152
+ polygon_coords=render_poly,
153
  text=translated_text,
154
  font_path=FONT_PATH,
155
  font_scale=1.0,
156
+ original_polygon=t.get("original_polygon"),
157
+ bubble_polygon=bubble_polygons[idx] if idx is not None else None
158
  )
159
 
160
  # -------------------------------------------------------
161
  # 6) Split for UI
162
  # -------------------------------------------------------
163
  if num_chunks > 1:
164
+ _, _, orig_chunks = load_and_split_image(file_obj, num_chunks)
165
+ trans_chunks = split_image_into_chunks(translated_full, num_chunks)
166
  else:
167
+ orig_chunks = [full_img]
168
+ trans_chunks = [translated_full]
 
 
 
 
 
 
169
 
170
+ orig_html = "".join([encode_image_to_html(c) for c in orig_chunks])
171
+ trans_html = "".join([encode_image_to_html(c) for c in trans_chunks])
172
  table_data = [[t["original"], t["translated"]] for t in translations]
173
 
174
  return filename, orig_html, trans_html, table_data, [translations]