Spaces:

Curify
/

manga_translation

Sleeping

App Files Files Community

qqwjq1981 commited on 29 days ago

Commit

11cf5cb

verified ·

1 Parent(s): c9c0e3f

Update utils/bubble_utils.py

Browse files

Files changed (1) hide show

utils/bubble_utils.py +43 -70

utils/bubble_utils.py CHANGED Viewed

@@ -15,7 +15,7 @@ from utils.polygon_utils import (
 )
 from utils.bubble_detect import detect_speech_bubbles_robust
 from utils.u2net_detector import detect_bubbles_u2net
-from utils.bubble_detect_rtdetr import detect_bubbles_rtdetr
 def bbox_to_polygon(bbox):
     """
@@ -78,124 +78,97 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
 # ===================== Main Bubble Translation Pipeline ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
-    End-to-end bubble translation pipeline:
-      1. Detect speech bubbles
-      2. OCR full page
-      3. Correct OCR polygons using bubble polygons
-      4. Render translated text using corrected polygons
-      5. Split into chunks
     """
     # -------------------------------------------------------
-    # 1) Load image
     # -------------------------------------------------------
     filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
-    print(f"📄 bubble_pipeline_single: filename={filename}, size={full_img.size}")
     # -------------------------------------------------------
-    # 2) Bubble detection (RT-DETR comic model)
     # -------------------------------------------------------
-    detections = detect_bubbles_rtdetr(full_img)
-    bubble_boxes = [d["bbox"] for d in detections if d["class"] == 0]  # pure bubble shapes
-    bubble_text_boxes = [d["bbox"] for d in detections if d["class"] == 1]  # text inside bubble
-    free_text_boxes = [d["bbox"] for d in detections if d["class"] == 2]  # text outside bubble
-    # Convert bubble boxes → polygons for your polygon-based pipeline
-    bubble_polygons = [bbox_to_polygon(b) for b in bubble_boxes]
-    print(f"🔍 RT-DETR: {len(bubble_polygons)} bubbles, "
-          f"{len(bubble_text_boxes)} bubble-text regions, "
-          f"{len(free_text_boxes)} free-text regions")
     if debug:
-        visualize_all_debug(full_img, [], bubble_polygons,
-                            step_name="bubbles_only",
-                            prefix="bubble_dbg")
     # -------------------------------------------------------
-    # 3) OCR globally
     # -------------------------------------------------------
     translations = extract_and_translate_chunk(full_img)
-    print(f"📝 OCR found {len(translations)} text regions")
-    if len(translations) == 0:
-        print("⚠️ No OCR text detected → fallback")
         return fallback_empty(file_obj, num_chunks, full_img)
-    # SAVE ORIGINAL POLYGONS for debugging
     for t in translations:
-        if "polygon" in t:
-            t["original_polygon"] = t["polygon"]
     # -------------------------------------------------------
-    # 4) Correct OCR polygons using bubble polygons
     # -------------------------------------------------------
-    if len(bubble_polygons) > 0:
-        print(f"✨ Correcting OCR polygons using bubble strategy: {polygon_strategy}")
         translations = correct_ocr_polygons_with_bubbles(
-            translations,
-            bubble_polygons,
-            strategy=polygon_strategy,
         )
         matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
-        print(f"✅ Matched {matched}/{len(translations)} OCR regions to bubbles")
-    else:
-        print("⚠️ No bubble polygons detected → skipping polygon correction")
     if debug:
-        visualize_all_debug(full_img, translations, bubble_polygons,
-                            step_name="after_correction",
-                            prefix="bubble_dbg")
     # -------------------------------------------------------
-    # 5) Render translated text
     # -------------------------------------------------------
     translated_full = full_img.copy()
     for t in translations:
-        corrected_poly = t.get("polygon")
-        original_poly  = t.get("original_polygon")
         translated_text = t.get("translated", "")
-        if not corrected_poly or not translated_text:
             continue
-        # Get bubble polygon (if matched)
-        bubble_poly = None
-        idx = t.get("matched_bubble_idx")
-        if idx is not None and 0 <= idx < len(bubble_polygons):
-            bubble_poly = bubble_polygons[idx]
-        # Render polygon is slightly shrunk
-        render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
-            polygon_coords=corrected_poly,   # corrected
             text=translated_text,
             font_path=FONT_PATH,
             font_scale=1.0,
-            original_polygon=original_poly,  # RED
-            bubble_polygon=bubble_poly       # BLUE
         )
     # -------------------------------------------------------
     # 6) Split for UI
     # -------------------------------------------------------
     if num_chunks > 1:
-        _, _, chunks = load_and_split_image(file_obj, num_chunks)
-        translated_chunks = split_image_into_chunks(translated_full, num_chunks)
     else:
-        chunks = [full_img]
-        translated_chunks = [translated_full]
-    # -------------------------------------------------------
-    # 7) Return output
-    # -------------------------------------------------------
-    orig_html = "".join([encode_image_to_html(c) for c in chunks])
-    trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
     table_data = [[t["original"], t["translated"]] for t in translations]
     return filename, orig_html, trans_html, table_data, [translations]

 )
 from utils.bubble_detect import detect_speech_bubbles_robust
 from utils.u2net_detector import detect_bubbles_u2net
+from utils.bubble_detect_rtdetr import detect_and_refine_bubbles
 def bbox_to_polygon(bbox):
     """
 # ===================== Main Bubble Translation Pipeline ===================
 def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
     """
+    Manga bubble-aware translation pipeline using RT-DETR bubble detection.
     """
     # -------------------------------------------------------
+    # 1) Load full image
     # -------------------------------------------------------
     filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
+    print(f"📄 bubble_pipeline_single: {filename}, size={full_img.size}")
     # -------------------------------------------------------
+    # 2) Detect & refine bubbles with RT-DETR
     # -------------------------------------------------------
+    bubble_polygons, interior_polygons = detect_and_refine_bubbles(full_img)
     if debug:
+        visualize_all_debug(
+            full_img, [], bubble_polygons,
+            step_name="bubbles_only", prefix="bubble_dbg"
+        )
     # -------------------------------------------------------
+    # 3) OCR full-page
     # -------------------------------------------------------
     translations = extract_and_translate_chunk(full_img)
+    print(f"📝 OCR found {len(translations)} regions")
+    if not translations:
         return fallback_empty(file_obj, num_chunks, full_img)
+    # save original polygon for visualization
     for t in translations:
+        t["original_polygon"] = t.get("polygon")
     # -------------------------------------------------------
+    # 4) Match OCR text regions with refined bubble polygons
     # -------------------------------------------------------
+    if bubble_polygons:
+        print("✨ Correcting OCR polygons using refined bubbles...")
         translations = correct_ocr_polygons_with_bubbles(
+            translations, bubble_polygons, strategy=polygon_strategy
         )
         matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
+        print(f"✅ Polygons matched to bubbles: {matched}/{len(translations)}")
     if debug:
+        visualize_all_debug(
+            full_img, translations, bubble_polygons,
+            step_name="after_correction", prefix="bubble_dbg"
+        )
     # -------------------------------------------------------
+    # 5) Render translated text using INTERIOR polygons
     # -------------------------------------------------------
     translated_full = full_img.copy()
     for t in translations:
+        idx = t.get("matched_bubble_idx")
         translated_text = t.get("translated", "")
+        if not translated_text:
             continue
+        if idx is not None and idx < len(interior_polygons):
+            render_poly = interior_polygons[idx]  # ✔ refined interior polygon
+        else:
+            render_poly = shrink_or_expand_polygon(t["polygon"], shrink_ratio=0.92)
         translated_full = draw_translated_text_convex(
             translated_full,
+            polygon_coords=render_poly,
             text=translated_text,
             font_path=FONT_PATH,
             font_scale=1.0,
+            original_polygon=t.get("original_polygon"),
+            bubble_polygon=bubble_polygons[idx] if idx is not None else None
         )
     # -------------------------------------------------------
     # 6) Split for UI
     # -------------------------------------------------------
     if num_chunks > 1:
+        _, _, orig_chunks = load_and_split_image(file_obj, num_chunks)
+        trans_chunks = split_image_into_chunks(translated_full, num_chunks)
     else:
+        orig_chunks = [full_img]
+        trans_chunks = [translated_full]
+    orig_html = "".join([encode_image_to_html(c) for c in orig_chunks])
+    trans_html = "".join([encode_image_to_html(c) for c in trans_chunks])
     table_data = [[t["original"], t["translated"]] for t in translations]
     return filename, orig_html, trans_html, table_data, [translations]