Spaces:

Curify
/

manga_translation

Runtime error

App Files Files Community

qqwjq1981 commited on Dec 5, 2025

Commit

a263a61

verified ·

1 Parent(s): e44cb9e

Update utils/bubble_utils.py

Browse files

Files changed (1) hide show

utils/bubble_utils.py +120 -40

utils/bubble_utils.py CHANGED Viewed

@@ -1,20 +1,34 @@
 import numpy as np
 from PIL import Image
 from utils.image_utils import load_and_split_image
 from utils.ocr_utils import extract_and_translate_chunk
-from utils.polygon_utils import draw_translated_text_convex, shrink_or_expand_polygon, FONT_PATH
 from utils.bubble_detect import detect_speech_bubbles
 from utils.image_utils import encode_image_to_html
-def bubble_pipeline_single(file_obj, num_chunks=1):
     """
-    End-to-end bubble translation pipeline:
     1. Global bubble detection
     2. OCR text extraction
-    3. Robust bubble-text matching
-    4. Inpaint + redraw inside each speech bubble
-    5. Assign bubbles to chunks (if panel split)
     """
     # ----------------------------------------------------------------------
@@ -27,49 +41,55 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
     # 2. Global speech bubble detection
     # ----------------------------------------------------------------------
     bubble_polygons = detect_speech_bubbles(full_img)
-    if len(bubble_polygons) == 0:
-        print("⚠️ No bubbles detected → FALLBACK to OCR-only pipeline.")
-        return fallback_ocr_pipeline(file_obj, num_chunks)
     # ----------------------------------------------------------------------
     # 3. OCR detection (global)
     # ----------------------------------------------------------------------
     translations = extract_and_translate_chunk(full_img)
     if len(translations) == 0:
-        print("⚠️ OCR found no text → fallback")
-        return fallback_ocr_pipeline(file_obj, num_chunks)
     # ----------------------------------------------------------------------
-    # 4. Assign each text box to its closest bubble
     # ----------------------------------------------------------------------
-    bubble_centers = [np.mean(poly, axis=0) for poly in bubble_polygons]
-    assignment = []  # (text, bubble_poly)
-    for t in translations:
-        text_center = np.mean(t["polygon"], axis=0)
-        dists = [np.linalg.norm(text_center - c) for c in bubble_centers]
-        best_idx = int(np.argmin(dists))
-        assignment.append((t, bubble_polygons[best_idx]))
     # ----------------------------------------------------------------------
     # 5. Render onto a working copy of full image
     # ----------------------------------------------------------------------
     translated_full = full_img.copy()
-    for t, bubble_poly in assignment:
-        translated_text = t["translated"]
-        # slightly shrink inside the bubble
-        inner_poly = shrink_or_expand_polygon(bubble_poly, shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
-            inner_poly,
             translated_text,
-            font_path=FONT_PATH,    # uses default inside polygon_utils
             font_scale=1.0
         )
@@ -78,13 +98,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
     # ----------------------------------------------------------------------
     if num_chunks > 1:
         _, _, chunks = load_and_split_image(file_obj, num_chunks)
-        translated_chunks = []
-        w, h = full_img.size
-        _, _, trans_chunks = load_and_split_image_image_obj(
-            translated_full, num_chunks
-        )
-        translated_chunks = trans_chunks
     else:
         chunks = [full_img]
         translated_chunks = [translated_full]
@@ -95,18 +111,33 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
-    # table for manual edit
     table_data = [[t["original"], t["translated"]] for t in translations]
     return filename, orig_html, trans_html, table_data, [translations]
-# ========================================================================
-# Fallback OCR Pipeline
-# ========================================================================
-from utils.polygon_utils import render_translated_chunk
 def fallback_ocr_pipeline(file_obj, num_chunks):
     """
@@ -134,3 +165,52 @@ def fallback_ocr_pipeline(file_obj, num_chunks):
     trans = "".join([encode_image_to_html(t) for t in translated_images])
     return filename, orig, trans, all_tables, all_translations

+"""
+Enhanced bubble detection pipeline with polygon correction
+"""
 import numpy as np
 from PIL import Image
 from utils.image_utils import load_and_split_image
 from utils.ocr_utils import extract_and_translate_chunk
+from utils.polygon_utils import (
+    draw_translated_text_convex,
+    shrink_or_expand_polygon,
+    FONT_PATH,
+    correct_ocr_polygons_with_bubbles,
+    render_translated_chunk
+)
 from utils.bubble_detect import detect_speech_bubbles
 from utils.image_utils import encode_image_to_html
+def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid"):
     """
+    End-to-end bubble translation pipeline with polygon correction:
     1. Global bubble detection
     2. OCR text extraction
+    3. Correct OCR polygons using detected bubbles
+    4. Inpaint + redraw inside corrected polygons
+    5. Split into chunks if needed
+    Args:
+        file_obj: Input image file
+        num_chunks: Number of panels to split into
+        polygon_strategy: How to correct polygons ("hybrid", "bubble", "intersect", "expand")
     """
     # ----------------------------------------------------------------------
     # 2. Global speech bubble detection
     # ----------------------------------------------------------------------
     bubble_polygons = detect_speech_bubbles(full_img)
+    print(f"🔍 Detected {len(bubble_polygons)} speech bubbles")
     # ----------------------------------------------------------------------
     # 3. OCR detection (global)
     # ----------------------------------------------------------------------
     translations = extract_and_translate_chunk(full_img)
+    print(f"📝 OCR found {len(translations)} text regions")
     if len(translations) == 0:
+        print("⚠️ OCR found no text → showing original image")
+        return fallback_empty(file_obj, num_chunks, full_img)
     # ----------------------------------------------------------------------
+    # 4. Correct OCR polygons using bubble detection
     # ----------------------------------------------------------------------
+    if len(bubble_polygons) > 0:
+        print(f"✨ Correcting OCR polygons using bubble detection (strategy: {polygon_strategy})")
+        translations = correct_ocr_polygons_with_bubbles(
+            translations,
+            bubble_polygons,
+            strategy=polygon_strategy
+        )
+        # Stats
+        matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
+        print(f"✅ Matched {matched}/{len(translations)} text regions to bubbles")
+    else:
+        print("⚠️ No bubbles detected → using original OCR polygons")
     # ----------------------------------------------------------------------
     # 5. Render onto a working copy of full image
     # ----------------------------------------------------------------------
     translated_full = full_img.copy()
+    for t in translations:
+        polygon = t.get("polygon")
+        translated_text = t.get("translated", "")
+        if not polygon or not translated_text:
+            continue
+        # Slightly shrink for better visual appearance
+        render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
+            render_poly,
             translated_text,
+            font_path=FONT_PATH,
             font_scale=1.0
         )
     # ----------------------------------------------------------------------
     if num_chunks > 1:
         _, _, chunks = load_and_split_image(file_obj, num_chunks)
+        # Split translated image the same way
+        translated_chunks = split_image_into_chunks(translated_full, num_chunks)
     else:
         chunks = [full_img]
         translated_chunks = [translated_full]
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
+    # Table for manual edit
     table_data = [[t["original"], t["translated"]] for t in translations]
     return filename, orig_html, trans_html, table_data, [translations]
+def split_image_into_chunks(img, num_chunks):
+    """Split PIL Image vertically into equal chunks"""
+    if num_chunks <= 1:
+        return [img]
+    width, height = img.size
+    chunk_height = height // num_chunks
+    chunks = []
+    for i in range(num_chunks):
+        top = i * chunk_height
+        bottom = height if i == num_chunks - 1 else (i + 1) * chunk_height
+        chunk = img.crop((0, top, width, bottom))
+        chunks.append(chunk)
+    return chunks
+# ========================================================================
+# Fallback Pipelines
+# ========================================================================
 def fallback_ocr_pipeline(file_obj, num_chunks):
     """
     trans = "".join([encode_image_to_html(t) for t in translated_images])
     return filename, orig, trans, all_tables, all_translations
+def fallback_empty(file_obj, num_chunks, full_img):
+    """Fallback when no text is detected"""
+    filename, _, chunks = load_and_split_image(file_obj, num_chunks)
+    orig = "".join([encode_image_to_html(c) for c in chunks])
+    trans = orig  # No translation to show
+    return filename, orig, trans, [], [[]]
+# ========================================================================
+# Debug/Visualization Utilities
+# ========================================================================
+def visualize_polygon_correction(img, translations, bubble_polygons, output_path=None):
+    """
+    Create debug visualization showing:
+    - Original OCR polygons in red
+    - Detected bubbles in blue
+    - Corrected polygons in green
+    """
+    from PIL import ImageDraw
+    debug_img = img.copy()
+    draw = ImageDraw.Draw(debug_img, 'RGBA')
+    # Draw bubbles in blue
+    for bubble in bubble_polygons:
+        draw.polygon(bubble, outline=(0, 0, 255, 128), width=2)
+    # Draw OCR polygons
+    for t in translations:
+        orig_poly = t.get("original_polygon")
+        corrected_poly = t.get("polygon")
+        # Original in red
+        if orig_poly:
+            draw.polygon(orig_poly, outline=(255, 0, 0, 128), width=2)
+        # Corrected in green
+        if corrected_poly:
+            draw.polygon(corrected_poly, outline=(0, 255, 0, 192), width=3)
+    if output_path:
+        debug_img.save(output_path)
+    return debug_img