Spaces:

Curify
/

manga_translation

Runtime error

App Files Files Community

qqwjq1981 commited on Dec 8, 2025

Commit

0bd8fa6

verified ·

1 Parent(s): 51254b6

Update utils/bubble_utils.py

Browse files

Files changed (1) hide show

utils/bubble_utils.py +54 -29

utils/bubble_utils.py CHANGED Viewed

@@ -62,44 +62,52 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
 # ===================== Main Bubble Translation Pipeline ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
     End-to-end bubble translation pipeline:
-      1. Global bubble detection
-      2. OCR text extraction
-      3. Correct OCR polygons using bubbles
-      4. Inpaint + redraw translated text
-      5. Split into chunks for UI
-    Args:
-        file_obj: Uploaded file object or path
-        num_chunks: #chunks for UI display
-        polygon_strategy: "hybrid", "bubble", "intersect", "expand"
-        debug: if True, saves debug overlay PNGs
     """
-    # 1) Load full page
     filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
     print(f"📄 bubble_pipeline_single: filename={filename}, size={full_img.size}")
     # 2) Robust bubble detection
     bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
-    print(f"🔍 Detected {len(bubble_polygons)} speech bubbles (robust)")
     if debug:
-        visualize_all_debug(full_img, [], bubble_polygons, step_name="bubbles_only", prefix="bubble_dbg")
     # 3) OCR globally
     translations = extract_and_translate_chunk(full_img)
     print(f"📝 OCR found {len(translations)} text regions")
     if len(translations) == 0:
-        print("⚠️ OCR found no text → fallback_empty")
         return fallback_empty(file_obj, num_chunks, full_img)
-    # 4) Correct OCR polygons if bubbles found
     if len(bubble_polygons) > 0:
-        print(f"✨ Correcting OCR polygons using bubbles (strategy={polygon_strategy})")
         translations = correct_ocr_polygons_with_bubbles(
             translations,
             bubble_polygons,
@@ -108,32 +116,48 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
         matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
         print(f"✅ Matched {matched}/{len(translations)} OCR regions to bubbles")
     else:
-        print("⚠️ No bubbles detected → using original OCR polygons")
     if debug:
-        visualize_all_debug(full_img, translations, bubble_polygons, step_name="after_correction", prefix="bubble_dbg")
-    # 5) Render translated text on a copy of the full page
     translated_full = full_img.copy()
     for t in translations:
-        polygon = t.get("polygon")
         translated_text = t.get("translated", "")
-        if not polygon or not translated_text:
             continue
-        render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
-            render_poly,
-            translated_text,
             font_path=FONT_PATH,
             font_scale=1.0,
         )
-    # 6) Split original and translated images into chunks for UI
     if num_chunks > 1:
         _, _, chunks = load_and_split_image(file_obj, num_chunks)
         translated_chunks = split_image_into_chunks(translated_full, num_chunks)
@@ -141,7 +165,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
         chunks = [full_img]
         translated_chunks = [translated_full]
-    # 7) Convert to HTML
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
@@ -149,7 +175,6 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
     return filename, orig_html, trans_html, table_data, [translations]
 def split_image_into_chunks(img, num_chunks):
     """
     Simple vertical splitting for the translated image.

 # ===================== Main Bubble Translation Pipeline ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
     End-to-end bubble translation pipeline:
+      1. Detect speech bubbles
+      2. OCR full page
+      3. Correct OCR polygons using bubble polygons
+      4. Render translated text using corrected polygons
+      5. Split into chunks
     """
+    # -------------------------------------------------------
+    # 1) Load image
+    # -------------------------------------------------------
     filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
     print(f"📄 bubble_pipeline_single: filename={filename}, size={full_img.size}")
+    # -------------------------------------------------------
     # 2) Robust bubble detection
+    # -------------------------------------------------------
     bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
+    print(f"🔍 Detected {len(bubble_polygons)} speech bubbles")
     if debug:
+        visualize_all_debug(full_img, [], bubble_polygons,
+                            step_name="bubbles_only",
+                            prefix="bubble_dbg")
+    # -------------------------------------------------------
     # 3) OCR globally
+    # -------------------------------------------------------
     translations = extract_and_translate_chunk(full_img)
     print(f"📝 OCR found {len(translations)} text regions")
     if len(translations) == 0:
+        print("⚠️ No OCR text detected → fallback")
         return fallback_empty(file_obj, num_chunks, full_img)
+    # SAVE ORIGINAL POLYGONS for debugging
+    for t in translations:
+        if "polygon" in t:
+            t["original_polygon"] = t["polygon"]
+    # -------------------------------------------------------
+    # 4) Correct OCR polygons using bubble polygons
+    # -------------------------------------------------------
     if len(bubble_polygons) > 0:
+        print(f"✨ Correcting OCR polygons using bubble strategy: {polygon_strategy}")
         translations = correct_ocr_polygons_with_bubbles(
             translations,
             bubble_polygons,
         matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
         print(f"✅ Matched {matched}/{len(translations)} OCR regions to bubbles")
     else:
+        print("⚠️ No bubble polygons detected → skipping polygon correction")
     if debug:
+        visualize_all_debug(full_img, translations, bubble_polygons,
+                            step_name="after_correction",
+                            prefix="bubble_dbg")
+    # -------------------------------------------------------
+    # 5) Render translated text
+    # -------------------------------------------------------
     translated_full = full_img.copy()
     for t in translations:
+        corrected_poly = t.get("polygon")
+        original_poly  = t.get("original_polygon")
         translated_text = t.get("translated", "")
+        if not corrected_poly or not translated_text:
             continue
+        # Get bubble polygon (if matched)
+        bubble_poly = None
+        idx = t.get("matched_bubble_idx")
+        if idx is not None and 0 <= idx < len(bubble_polygons):
+            bubble_poly = bubble_polygons[idx]
+        # Render polygon is slightly shrunk
+        render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
+            polygon_coords=corrected_poly,   # corrected
+            text=translated_text,
             font_path=FONT_PATH,
             font_scale=1.0,
+            original_polygon=original_poly,  # RED
+            bubble_polygon=bubble_poly       # BLUE
         )
+    # -------------------------------------------------------
+    # 6) Split for UI
+    # -------------------------------------------------------
     if num_chunks > 1:
         _, _, chunks = load_and_split_image(file_obj, num_chunks)
         translated_chunks = split_image_into_chunks(translated_full, num_chunks)
         chunks = [full_img]
         translated_chunks = [translated_full]
+    # -------------------------------------------------------
+    # 7) Return output
+    # -------------------------------------------------------
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
     return filename, orig_html, trans_html, table_data, [translations]
 def split_image_into_chunks(img, num_chunks):
     """
     Simple vertical splitting for the translated image.