qqwjq1981 commited on
Commit
0bd8fa6
Β·
verified Β·
1 Parent(s): 51254b6

Update utils/bubble_utils.py

Browse files
Files changed (1) hide show
  1. utils/bubble_utils.py +54 -29
utils/bubble_utils.py CHANGED
@@ -62,44 +62,52 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
62
 
63
 
64
  # ===================== Main Bubble Translation Pipeline ===================
65
-
66
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
67
  """
68
  End-to-end bubble translation pipeline:
69
- 1. Global bubble detection
70
- 2. OCR text extraction
71
- 3. Correct OCR polygons using bubbles
72
- 4. Inpaint + redraw translated text
73
- 5. Split into chunks for UI
74
-
75
- Args:
76
- file_obj: Uploaded file object or path
77
- num_chunks: #chunks for UI display
78
- polygon_strategy: "hybrid", "bubble", "intersect", "expand"
79
- debug: if True, saves debug overlay PNGs
80
  """
81
- # 1) Load full page
 
 
82
  filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
83
  print(f"πŸ“„ bubble_pipeline_single: filename={filename}, size={full_img.size}")
84
 
 
85
  # 2) Robust bubble detection
 
86
  bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
87
- print(f"πŸ” Detected {len(bubble_polygons)} speech bubbles (robust)")
88
 
89
  if debug:
90
- visualize_all_debug(full_img, [], bubble_polygons, step_name="bubbles_only", prefix="bubble_dbg")
 
 
91
 
 
92
  # 3) OCR globally
 
93
  translations = extract_and_translate_chunk(full_img)
94
  print(f"πŸ“ OCR found {len(translations)} text regions")
95
 
96
  if len(translations) == 0:
97
- print("⚠️ OCR found no text β†’ fallback_empty")
98
  return fallback_empty(file_obj, num_chunks, full_img)
99
 
100
- # 4) Correct OCR polygons if bubbles found
 
 
 
 
 
 
 
101
  if len(bubble_polygons) > 0:
102
- print(f"✨ Correcting OCR polygons using bubbles (strategy={polygon_strategy})")
103
  translations = correct_ocr_polygons_with_bubbles(
104
  translations,
105
  bubble_polygons,
@@ -108,32 +116,48 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
108
  matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
109
  print(f"βœ… Matched {matched}/{len(translations)} OCR regions to bubbles")
110
  else:
111
- print("⚠️ No bubbles detected β†’ using original OCR polygons")
112
 
113
  if debug:
114
- visualize_all_debug(full_img, translations, bubble_polygons, step_name="after_correction", prefix="bubble_dbg")
 
 
115
 
116
- # 5) Render translated text on a copy of the full page
 
 
117
  translated_full = full_img.copy()
118
 
119
  for t in translations:
120
- polygon = t.get("polygon")
 
121
  translated_text = t.get("translated", "")
122
 
123
- if not polygon or not translated_text:
124
  continue
125
 
126
- render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
 
 
 
 
 
 
 
127
 
128
  translated_full = draw_translated_text_convex(
129
  translated_full,
130
- render_poly,
131
- translated_text,
132
  font_path=FONT_PATH,
133
  font_scale=1.0,
 
 
134
  )
135
 
136
- # 6) Split original and translated images into chunks for UI
 
 
137
  if num_chunks > 1:
138
  _, _, chunks = load_and_split_image(file_obj, num_chunks)
139
  translated_chunks = split_image_into_chunks(translated_full, num_chunks)
@@ -141,7 +165,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
141
  chunks = [full_img]
142
  translated_chunks = [translated_full]
143
 
144
- # 7) Convert to HTML
 
 
145
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
146
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
147
 
@@ -149,7 +175,6 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
149
 
150
  return filename, orig_html, trans_html, table_data, [translations]
151
 
152
-
153
  def split_image_into_chunks(img, num_chunks):
154
  """
155
  Simple vertical splitting for the translated image.
 
62
 
63
 
64
  # ===================== Main Bubble Translation Pipeline ===================
 
65
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
66
  """
67
  End-to-end bubble translation pipeline:
68
+ 1. Detect speech bubbles
69
+ 2. OCR full page
70
+ 3. Correct OCR polygons using bubble polygons
71
+ 4. Render translated text using corrected polygons
72
+ 5. Split into chunks
 
 
 
 
 
 
73
  """
74
+ # -------------------------------------------------------
75
+ # 1) Load image
76
+ # -------------------------------------------------------
77
  filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
78
  print(f"πŸ“„ bubble_pipeline_single: filename={filename}, size={full_img.size}")
79
 
80
+ # -------------------------------------------------------
81
  # 2) Robust bubble detection
82
+ # -------------------------------------------------------
83
  bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
84
+ print(f"πŸ” Detected {len(bubble_polygons)} speech bubbles")
85
 
86
  if debug:
87
+ visualize_all_debug(full_img, [], bubble_polygons,
88
+ step_name="bubbles_only",
89
+ prefix="bubble_dbg")
90
 
91
+ # -------------------------------------------------------
92
  # 3) OCR globally
93
+ # -------------------------------------------------------
94
  translations = extract_and_translate_chunk(full_img)
95
  print(f"πŸ“ OCR found {len(translations)} text regions")
96
 
97
  if len(translations) == 0:
98
+ print("⚠️ No OCR text detected β†’ fallback")
99
  return fallback_empty(file_obj, num_chunks, full_img)
100
 
101
+ # SAVE ORIGINAL POLYGONS for debugging
102
+ for t in translations:
103
+ if "polygon" in t:
104
+ t["original_polygon"] = t["polygon"]
105
+
106
+ # -------------------------------------------------------
107
+ # 4) Correct OCR polygons using bubble polygons
108
+ # -------------------------------------------------------
109
  if len(bubble_polygons) > 0:
110
+ print(f"✨ Correcting OCR polygons using bubble strategy: {polygon_strategy}")
111
  translations = correct_ocr_polygons_with_bubbles(
112
  translations,
113
  bubble_polygons,
 
116
  matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
117
  print(f"βœ… Matched {matched}/{len(translations)} OCR regions to bubbles")
118
  else:
119
+ print("⚠️ No bubble polygons detected β†’ skipping polygon correction")
120
 
121
  if debug:
122
+ visualize_all_debug(full_img, translations, bubble_polygons,
123
+ step_name="after_correction",
124
+ prefix="bubble_dbg")
125
 
126
+ # -------------------------------------------------------
127
+ # 5) Render translated text
128
+ # -------------------------------------------------------
129
  translated_full = full_img.copy()
130
 
131
  for t in translations:
132
+ corrected_poly = t.get("polygon")
133
+ original_poly = t.get("original_polygon")
134
  translated_text = t.get("translated", "")
135
 
136
+ if not corrected_poly or not translated_text:
137
  continue
138
 
139
+ # Get bubble polygon (if matched)
140
+ bubble_poly = None
141
+ idx = t.get("matched_bubble_idx")
142
+ if idx is not None and 0 <= idx < len(bubble_polygons):
143
+ bubble_poly = bubble_polygons[idx]
144
+
145
+ # Render polygon is slightly shrunk
146
+ render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
147
 
148
  translated_full = draw_translated_text_convex(
149
  translated_full,
150
+ polygon_coords=corrected_poly, # corrected
151
+ text=translated_text,
152
  font_path=FONT_PATH,
153
  font_scale=1.0,
154
+ original_polygon=original_poly, # RED
155
+ bubble_polygon=bubble_poly # BLUE
156
  )
157
 
158
+ # -------------------------------------------------------
159
+ # 6) Split for UI
160
+ # -------------------------------------------------------
161
  if num_chunks > 1:
162
  _, _, chunks = load_and_split_image(file_obj, num_chunks)
163
  translated_chunks = split_image_into_chunks(translated_full, num_chunks)
 
165
  chunks = [full_img]
166
  translated_chunks = [translated_full]
167
 
168
+ # -------------------------------------------------------
169
+ # 7) Return output
170
+ # -------------------------------------------------------
171
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
172
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
173
 
 
175
 
176
  return filename, orig_html, trans_html, table_data, [translations]
177
 
 
178
  def split_image_into_chunks(img, num_chunks):
179
  """
180
  Simple vertical splitting for the translated image.