Spaces:

Curify
/

manga_translation

Runtime error

App Files Files Community

qqwjq1981 commited on Dec 10, 2025

Commit

03501f8

verified ·

1 Parent(s): 5902acb

Update utils/bubble_utils.py

Browse files

Files changed (1) hide show

utils/bubble_utils.py +58 -62

utils/bubble_utils.py CHANGED Viewed

@@ -177,129 +177,124 @@ def visualize_all_debug(
     return out_path
-# ===================== Main Bubble Translation Pipeline ===================
 # ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
     Manga bubble-aware translation pipeline (CHUNK-BASED):
-      - Split image into chunks vertically
       - For each chunk:
-            bubble detection (RT-DETR)
-            bubble refinement
-            mask-based OCR
-            bubble-aware polygon correction
-            render translated text
-      - Merge translated chunks into final output
-    Returns same format as other pipelines.
     """
     debug_files = []
     # -------------------------------------------------------
-    # 1. Load & split image into chunks
     # -------------------------------------------------------
     filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
     print(f"📄 bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
-    all_translations = []
-    all_tables = []
-    translated_chunks = []
-    y_offset = 0   # track position of each chunk in the full image
     # -------------------------------------------------------
     # 2. Process each chunk independently
     # -------------------------------------------------------
     for ci, chunk in enumerate(chunks):
         print(f"\n================ CHUNK {ci} ================")
-        chunk_h = chunk.size[1]
-        # ---- A) Detect & Refine Bubbles in This Chunk ----
         bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
         print(f"🔍 Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
-        # Add chunk offset to polygons & boxes
-        bubble_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in bubble_polygons ]
-        interior_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in interior_polygons ]
-        bubble_boxes = [ (x1, y1 + y_offset, x2, y2 + y_offset) for (x1,y1,x2,y2) in bubble_boxes ]
-        # ---- Debug plot: Bubble shapes only ----
         if debug:
-            debug_img = full_img.copy()
-            p = visualize_all_debug(
-                debug_img, [], bubble_polygons, bubble_boxes=bubble_boxes,
                 step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
             )
-            debug_files.append(p)
-        # ---- B) Mask-based OCR inside bubbles ----
-        print(f"📝 Chunk {ci}: masked OCR...")
-        translations = extract_and_translate_with_masks(full_img, normalize_bubble_regions(bubble_boxes))
         if not translations:
-            print(f"⚠️ Chunk {ci}: falling back to full OCR")
             translations = extract_and_translate_chunk(chunk)
-            # add back offset for OCR polygons
-            for t in translations:
-                if t["polygon"]:
-                    t["polygon"] = [(x, y + y_offset) for (x, y) in t["polygon"]]
         print(f"⬆️ Chunk {ci}: OCR detections = {len(translations)}")
-        # Record original polygons
         for t in translations:
             t["original_polygon"] = t.get("polygon")
-        # ---- C) Bubble-aware polygon matching ----
-        translations = match_translations_to_bubbles(translations, bubble_polygons)
-        # Debug: after-correction polygons
         if debug:
-            debug_img2 = full_img.copy()
-            p = visualize_all_debug(
-                debug_img2, translations, bubble_polygons, bubble_boxes=bubble_boxes,
                 step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
             )
-            debug_files.append(p)
-        # ---- D) Render translated text inside bubble areas ----
-        translated_chunk_img = full_img.copy()
         for t in translations:
-            text = t.get("translated")
-            if not text:
                 continue
             bidx = t.get("matched_bubble_idx")
-            if bidx is not None and bidx < len(bubble_polygons):
                 render_poly = bubble_polygons[bidx]
             else:
-                render_poly = t["polygon"]
             translated_chunk_img = draw_translated_text_convex(
                 translated_chunk_img,
                 polygon_coords=render_poly,
-                text=text,
                 font_path=FONT_PATH,
                 font_scale=1.0,
                 original_polygon=t.get("original_polygon"),
-                bubble_polygon=bubble_polygons[bidx] if bidx is not None else None
             )
-        # Save translations
-        all_translations.extend(translations)
-        # Save to table
-        for t in translations:
-            all_tables.append([t["original"], t["translated"]])
         translated_chunks.append(translated_chunk_img)
-        # Move offset down to next chunk
-        y_offset += chunk_h
     # -------------------------------------------------------
     # 3. Assemble HTML output (original & translated)
@@ -307,6 +302,7 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
     return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
 def split_image_into_chunks(img, num_chunks):

     return out_path
+# ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
 # ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
     Manga bubble-aware translation pipeline (CHUNK-BASED):
+      - Split image into vertical chunks
       - For each chunk:
+            * RT-DETR bubble detection
+            * Bubble shape refinement (outer + inner polygons)
+            * Mask-based OCR inside bubble interiors
+            * Bubble-aware polygon matching
+            * Render translated text inside bubble regions
+      - Return concatenated HTML previews + table + debug files
+    NOTE:
+      * All polygons are kept in CHUNK-LOCAL coordinates.
+      * Manual-edit pipeline isn't wired for Bubble mode, so this is fine.
     """
     debug_files = []
     # -------------------------------------------------------
+    # 1. Load & split image into chunks (vertical)
     # -------------------------------------------------------
     filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
     print(f"📄 bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
+    all_translations = []   # flattened list across chunks
+    all_tables = []         # [["original", "translated"], ...]
+    translated_chunks = []  # list of PIL images (per chunk)
     # -------------------------------------------------------
     # 2. Process each chunk independently
     # -------------------------------------------------------
     for ci, chunk in enumerate(chunks):
         print(f"\n================ CHUNK {ci} ================")
+        cw, ch = chunk.size
+        print(f"   Chunk size: {cw}x{ch}")
+        # ---- A) Detect & refine bubbles in THIS chunk (local coords) ----
         bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
         print(f"🔍 Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
+        # Debug: bubble shapes & boxes on the chunk image
         if debug:
+            dbg_path = visualize_all_debug(
+                chunk, [], bubble_polygons, bubble_boxes=bubble_boxes,
                 step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
             )
+            debug_files.append(dbg_path)
+        # ---- B) Mask-based OCR INSIDE bubble interiors (on the chunk) ----
+        print(f"📝 Chunk {ci}: masked OCR inside bubble interiors...")
+        translations = extract_and_translate_with_masks(chunk, interior_polygons)
+        # Fallback: if masked OCR failed, run full OCR on the chunk
         if not translations:
+            print(f"⚠️ Chunk {ci}: masked OCR found no text → fallback to full OCR")
             translations = extract_and_translate_chunk(chunk)
         print(f"⬆️ Chunk {ci}: OCR detections = {len(translations)}")
+        # Save original polygons for debug visualization
         for t in translations:
             t["original_polygon"] = t.get("polygon")
+        # ---- C) Bubble-aware matching (still in chunk-local coords) ----
+        if bubble_polygons:
+            print(f"🔄 Chunk {ci}: matching OCR polygons to bubbles...")
+            translations = match_translations_to_bubbles(translations, bubble_polygons)
+        else:
+            print(f"⚠️ Chunk {ci}: no bubble polygons → skip bubble matching")
+        # Debug: polygons after matching
         if debug:
+            dbg_path2 = visualize_all_debug(
+                chunk, translations, bubble_polygons, bubble_boxes=bubble_boxes,
                 step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
             )
+            debug_files.append(dbg_path2)
+        # ---- D) Render translated text onto THIS chunk ----
+        translated_chunk_img = chunk.copy()
         for t in translations:
+            translated_text = t.get("translated", "")
+            if not translated_text:
                 continue
             bidx = t.get("matched_bubble_idx")
+            # Prefer the refined bubble polygon when we have a match
+            if bidx is not None and 0 <= bidx < len(bubble_polygons):
                 render_poly = bubble_polygons[bidx]
             else:
+                # Fallback: use OCR polygon directly
+                render_poly = t.get("polygon")
+            if not render_poly:
+                continue
             translated_chunk_img = draw_translated_text_convex(
                 translated_chunk_img,
                 polygon_coords=render_poly,
+                text=translated_text,
                 font_path=FONT_PATH,
                 font_scale=1.0,
                 original_polygon=t.get("original_polygon"),
+                bubble_polygon=bubble_polygons[bidx] if (bidx is not None and 0 <= bidx < len(bubble_polygons)) else None,
             )
+        # ---- E) Collect outputs from this chunk ----
         translated_chunks.append(translated_chunk_img)
+        for t in translations:
+            all_translations.append(t)
+            all_tables.append([t.get("original", ""), t.get("translated", "")])
     # -------------------------------------------------------
     # 3. Assemble HTML output (original & translated)
     orig_html = "".join([encode_image_to_html(c) for c in chunks])
     trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
+    # Keep API compatible: translations wrapped in a list
     return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
 def split_image_into_chunks(img, num_chunks):