qqwjq1981 commited on
Commit
03501f8
Β·
verified Β·
1 Parent(s): 5902acb

Update utils/bubble_utils.py

Browse files
Files changed (1) hide show
  1. utils/bubble_utils.py +58 -62
utils/bubble_utils.py CHANGED
@@ -177,129 +177,124 @@ def visualize_all_debug(
177
 
178
  return out_path
179
 
180
- # ===================== Main Bubble Translation Pipeline ===================
 
181
  # ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
182
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
183
  """
184
  Manga bubble-aware translation pipeline (CHUNK-BASED):
185
- - Split image into chunks vertically
186
  - For each chunk:
187
- bubble detection (RT-DETR)
188
- bubble refinement
189
- mask-based OCR
190
- bubble-aware polygon correction
191
- render translated text
192
- - Merge translated chunks into final output
193
-
194
- Returns same format as other pipelines.
 
 
195
  """
196
 
197
  debug_files = []
198
 
199
  # -------------------------------------------------------
200
- # 1. Load & split image into chunks
201
  # -------------------------------------------------------
202
  filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
203
  print(f"πŸ“„ bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
204
 
205
- all_translations = []
206
- all_tables = []
207
- translated_chunks = []
208
-
209
- y_offset = 0 # track position of each chunk in the full image
210
 
211
  # -------------------------------------------------------
212
  # 2. Process each chunk independently
213
  # -------------------------------------------------------
214
  for ci, chunk in enumerate(chunks):
215
  print(f"\n================ CHUNK {ci} ================")
216
- chunk_h = chunk.size[1]
 
217
 
218
- # ---- A) Detect & Refine Bubbles in This Chunk ----
219
  bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
220
  print(f"πŸ” Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
221
 
222
- # Add chunk offset to polygons & boxes
223
- bubble_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in bubble_polygons ]
224
- interior_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in interior_polygons ]
225
- bubble_boxes = [ (x1, y1 + y_offset, x2, y2 + y_offset) for (x1,y1,x2,y2) in bubble_boxes ]
226
-
227
- # ---- Debug plot: Bubble shapes only ----
228
  if debug:
229
- debug_img = full_img.copy()
230
- p = visualize_all_debug(
231
- debug_img, [], bubble_polygons, bubble_boxes=bubble_boxes,
232
  step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
233
  )
234
- debug_files.append(p)
235
 
236
- # ---- B) Mask-based OCR inside bubbles ----
237
- print(f"πŸ“ Chunk {ci}: masked OCR...")
238
- translations = extract_and_translate_with_masks(full_img, normalize_bubble_regions(bubble_boxes))
239
 
 
240
  if not translations:
241
- print(f"⚠️ Chunk {ci}: falling back to full OCR")
242
  translations = extract_and_translate_chunk(chunk)
243
 
244
- # add back offset for OCR polygons
245
- for t in translations:
246
- if t["polygon"]:
247
- t["polygon"] = [(x, y + y_offset) for (x, y) in t["polygon"]]
248
-
249
  print(f"⬆️ Chunk {ci}: OCR detections = {len(translations)}")
250
 
251
- # Record original polygons
252
  for t in translations:
253
  t["original_polygon"] = t.get("polygon")
254
 
255
- # ---- C) Bubble-aware polygon matching ----
256
- translations = match_translations_to_bubbles(translations, bubble_polygons)
 
 
 
 
257
 
258
- # Debug: after-correction polygons
259
  if debug:
260
- debug_img2 = full_img.copy()
261
- p = visualize_all_debug(
262
- debug_img2, translations, bubble_polygons, bubble_boxes=bubble_boxes,
263
  step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
264
  )
265
- debug_files.append(p)
266
 
267
- # ---- D) Render translated text inside bubble areas ----
268
- translated_chunk_img = full_img.copy()
269
 
270
  for t in translations:
271
- text = t.get("translated")
272
- if not text:
273
  continue
274
 
275
  bidx = t.get("matched_bubble_idx")
276
 
277
- if bidx is not None and bidx < len(bubble_polygons):
 
278
  render_poly = bubble_polygons[bidx]
279
  else:
280
- render_poly = t["polygon"]
 
 
 
 
281
 
282
  translated_chunk_img = draw_translated_text_convex(
283
  translated_chunk_img,
284
  polygon_coords=render_poly,
285
- text=text,
286
  font_path=FONT_PATH,
287
  font_scale=1.0,
288
  original_polygon=t.get("original_polygon"),
289
- bubble_polygon=bubble_polygons[bidx] if bidx is not None else None
290
  )
291
 
292
- # Save translations
293
- all_translations.extend(translations)
294
-
295
- # Save to table
296
- for t in translations:
297
- all_tables.append([t["original"], t["translated"]])
298
-
299
  translated_chunks.append(translated_chunk_img)
300
 
301
- # Move offset down to next chunk
302
- y_offset += chunk_h
 
303
 
304
  # -------------------------------------------------------
305
  # 3. Assemble HTML output (original & translated)
@@ -307,6 +302,7 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
307
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
308
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
309
 
 
310
  return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
311
 
312
  def split_image_into_chunks(img, num_chunks):
 
177
 
178
  return out_path
179
 
180
+
181
+ # ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
182
  # ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
183
  def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
184
  """
185
  Manga bubble-aware translation pipeline (CHUNK-BASED):
186
+ - Split image into vertical chunks
187
  - For each chunk:
188
+ * RT-DETR bubble detection
189
+ * Bubble shape refinement (outer + inner polygons)
190
+ * Mask-based OCR inside bubble interiors
191
+ * Bubble-aware polygon matching
192
+ * Render translated text inside bubble regions
193
+ - Return concatenated HTML previews + table + debug files
194
+
195
+ NOTE:
196
+ * All polygons are kept in CHUNK-LOCAL coordinates.
197
+ * Manual-edit pipeline isn't wired for Bubble mode, so this is fine.
198
  """
199
 
200
  debug_files = []
201
 
202
  # -------------------------------------------------------
203
+ # 1. Load & split image into chunks (vertical)
204
  # -------------------------------------------------------
205
  filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
206
  print(f"πŸ“„ bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
207
 
208
+ all_translations = [] # flattened list across chunks
209
+ all_tables = [] # [["original", "translated"], ...]
210
+ translated_chunks = [] # list of PIL images (per chunk)
 
 
211
 
212
  # -------------------------------------------------------
213
  # 2. Process each chunk independently
214
  # -------------------------------------------------------
215
  for ci, chunk in enumerate(chunks):
216
  print(f"\n================ CHUNK {ci} ================")
217
+ cw, ch = chunk.size
218
+ print(f" Chunk size: {cw}x{ch}")
219
 
220
+ # ---- A) Detect & refine bubbles in THIS chunk (local coords) ----
221
  bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
222
  print(f"πŸ” Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
223
 
224
+ # Debug: bubble shapes & boxes on the chunk image
 
 
 
 
 
225
  if debug:
226
+ dbg_path = visualize_all_debug(
227
+ chunk, [], bubble_polygons, bubble_boxes=bubble_boxes,
 
228
  step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
229
  )
230
+ debug_files.append(dbg_path)
231
 
232
+ # ---- B) Mask-based OCR INSIDE bubble interiors (on the chunk) ----
233
+ print(f"πŸ“ Chunk {ci}: masked OCR inside bubble interiors...")
234
+ translations = extract_and_translate_with_masks(chunk, interior_polygons)
235
 
236
+ # Fallback: if masked OCR failed, run full OCR on the chunk
237
  if not translations:
238
+ print(f"⚠️ Chunk {ci}: masked OCR found no text β†’ fallback to full OCR")
239
  translations = extract_and_translate_chunk(chunk)
240
 
 
 
 
 
 
241
  print(f"⬆️ Chunk {ci}: OCR detections = {len(translations)}")
242
 
243
+ # Save original polygons for debug visualization
244
  for t in translations:
245
  t["original_polygon"] = t.get("polygon")
246
 
247
+ # ---- C) Bubble-aware matching (still in chunk-local coords) ----
248
+ if bubble_polygons:
249
+ print(f"πŸ”„ Chunk {ci}: matching OCR polygons to bubbles...")
250
+ translations = match_translations_to_bubbles(translations, bubble_polygons)
251
+ else:
252
+ print(f"⚠️ Chunk {ci}: no bubble polygons β†’ skip bubble matching")
253
 
254
+ # Debug: polygons after matching
255
  if debug:
256
+ dbg_path2 = visualize_all_debug(
257
+ chunk, translations, bubble_polygons, bubble_boxes=bubble_boxes,
 
258
  step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
259
  )
260
+ debug_files.append(dbg_path2)
261
 
262
+ # ---- D) Render translated text onto THIS chunk ----
263
+ translated_chunk_img = chunk.copy()
264
 
265
  for t in translations:
266
+ translated_text = t.get("translated", "")
267
+ if not translated_text:
268
  continue
269
 
270
  bidx = t.get("matched_bubble_idx")
271
 
272
+ # Prefer the refined bubble polygon when we have a match
273
+ if bidx is not None and 0 <= bidx < len(bubble_polygons):
274
  render_poly = bubble_polygons[bidx]
275
  else:
276
+ # Fallback: use OCR polygon directly
277
+ render_poly = t.get("polygon")
278
+
279
+ if not render_poly:
280
+ continue
281
 
282
  translated_chunk_img = draw_translated_text_convex(
283
  translated_chunk_img,
284
  polygon_coords=render_poly,
285
+ text=translated_text,
286
  font_path=FONT_PATH,
287
  font_scale=1.0,
288
  original_polygon=t.get("original_polygon"),
289
+ bubble_polygon=bubble_polygons[bidx] if (bidx is not None and 0 <= bidx < len(bubble_polygons)) else None,
290
  )
291
 
292
+ # ---- E) Collect outputs from this chunk ----
 
 
 
 
 
 
293
  translated_chunks.append(translated_chunk_img)
294
 
295
+ for t in translations:
296
+ all_translations.append(t)
297
+ all_tables.append([t.get("original", ""), t.get("translated", "")])
298
 
299
  # -------------------------------------------------------
300
  # 3. Assemble HTML output (original & translated)
 
302
  orig_html = "".join([encode_image_to_html(c) for c in chunks])
303
  trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
304
 
305
+ # Keep API compatible: translations wrapped in a list
306
  return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
307
 
308
  def split_image_into_chunks(img, num_chunks):