Spaces:

Curify
/

manga_translation

Sleeping

App Files Files Community

qqwjq1981 commited on Dec 5, 2025

Commit

e44cb9e

verified ·

1 Parent(s): 2d1d7b4

Update utils/bubble_detect.py

Browse files

Files changed (1) hide show

utils/bubble_detect.py +197 -187

utils/bubble_detect.py CHANGED Viewed

@@ -1,216 +1,226 @@
 """
-Enhanced bubble detection pipeline with polygon correction
 """
 import numpy as np
-from PIL import Image
-from utils.image_utils import load_and_split_image
-from utils.ocr_utils import extract_and_translate_chunk
-from utils.polygon_utils import (
-    draw_translated_text_convex,
-    shrink_or_expand_polygon,
-    FONT_PATH,
-    correct_ocr_polygons_with_bubbles,
-    render_translated_chunk
-)
-from utils.bubble_detect import detect_speech_bubbles
-from utils.image_utils import encode_image_to_html
-def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid"):
     """
-    End-to-end bubble translation pipeline with polygon correction:
-    1. Global bubble detection
-    2. OCR text extraction
-    3. Correct OCR polygons using detected bubbles
-    4. Inpaint + redraw inside corrected polygons
-    5. Split into chunks if needed
     Args:
-        file_obj: Input image file
-        num_chunks: Number of panels to split into
-        polygon_strategy: How to correct polygons ("hybrid", "bubble", "intersect", "expand")
     """
-    # ----------------------------------------------------------------------
-    # 1. Load full image (no splitting yet)
-    # ----------------------------------------------------------------------
-    filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
-    full_img_cv = np.array(full_img)
-    # ----------------------------------------------------------------------
-    # 2. Global speech bubble detection
-    # ----------------------------------------------------------------------
-    bubble_polygons = detect_speech_bubbles(full_img)
-    print(f"🔍 Detected {len(bubble_polygons)} speech bubbles")
-    # ----------------------------------------------------------------------
-    # 3. OCR detection (global)
-    # ----------------------------------------------------------------------
-    translations = extract_and_translate_chunk(full_img)
-    print(f"📝 OCR found {len(translations)} text regions")
-    if len(translations) == 0:
-        print("⚠️ OCR found no text → showing original image")
-        return fallback_empty(file_obj, num_chunks, full_img)
-    # ----------------------------------------------------------------------
-    # 4. Correct OCR polygons using bubble detection
-    # ----------------------------------------------------------------------
-    if len(bubble_polygons) > 0:
-        print(f"✨ Correcting OCR polygons using bubble detection (strategy: {polygon_strategy})")
-        translations = correct_ocr_polygons_with_bubbles(
-            translations,
-            bubble_polygons,
-            strategy=polygon_strategy
-        )
-        # Stats
-        matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
-        print(f"✅ Matched {matched}/{len(translations)} text regions to bubbles")
-    else:
-        print("⚠️ No bubbles detected → using original OCR polygons")
-    # ----------------------------------------------------------------------
-    # 5. Render onto a working copy of full image
-    # ----------------------------------------------------------------------
-    translated_full = full_img.copy()
-    for t in translations:
-        polygon = t.get("polygon")
-        translated_text = t.get("translated", "")
-        if not polygon or not translated_text:
             continue
-        # Slightly shrink for better visual appearance
-        render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
-        translated_full = draw_translated_text_convex(
-            translated_full,
-            render_poly,
-            translated_text,
-            font_path=FONT_PATH,
-            font_scale=1.0
-        )
-    # ----------------------------------------------------------------------
-    # 6. Split full translated image into chunks (if num_chunks > 1)
-    # ----------------------------------------------------------------------
-    if num_chunks > 1:
-        _, _, chunks = load_and_split_image(file_obj, num_chunks)
-        # Split translated image the same way
-        translated_chunks = split_image_into_chunks(translated_full, num_chunks)
-    else:
-        chunks = [full_img]
-        translated_chunks = [translated_full]
-    # ----------------------------------------------------------------------
-    # 7. Convert for HTML
-    # ----------------------------------------------------------------------
-    orig_html = "".join([encode_image_to_html(c) for c in chunks])
-    trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
-    # Table for manual edit
-    table_data = [[t["original"], t["translated"]] for t in translations]
-    return filename, orig_html, trans_html, table_data, [translations]
-def split_image_into_chunks(img, num_chunks):
-    """Split PIL Image vertically into equal chunks"""
-    if num_chunks <= 1:
-        return [img]
-    width, height = img.size
-    chunk_height = height // num_chunks
-    chunks = []
-    for i in range(num_chunks):
-        top = i * chunk_height
-        bottom = height if i == num_chunks - 1 else (i + 1) * chunk_height
-        chunk = img.crop((0, top, width, bottom))
-        chunks.append(chunk)
-    return chunks
-# ========================================================================
-# Fallback Pipelines
-# ========================================================================
-def fallback_ocr_pipeline(file_obj, num_chunks):
     """
-    Standard OCR-based translation pipeline.
-    Used when bubble detection fails or page has no bubbles.
     """
-    filename, image, chunks = load_and_split_image(file_obj, num_chunks)
-    all_translations = []
-    all_tables = []
-    translated_images = []
-    for chunk in chunks:
-        trans = extract_and_translate_chunk(chunk)
-        tbl = [[t["original"], t["translated"]] for t in trans]
-        all_translations.append(trans)
-        all_tables.extend(tbl)
-        img_t = render_translated_chunk(chunk, trans)
-        translated_images.append(img_t)
-    orig = "".join([encode_image_to_html(c) for c in chunks])
-    trans = "".join([encode_image_to_html(t) for t in translated_images])
-    return filename, orig, trans, all_tables, all_translations
-def fallback_empty(file_obj, num_chunks, full_img):
-    """Fallback when no text is detected"""
-    filename, _, chunks = load_and_split_image(file_obj, num_chunks)
-    orig = "".join([encode_image_to_html(c) for c in chunks])
-    trans = orig  # No translation to show
-    return filename, orig, trans, [], [[]]
-# ========================================================================
-# Debug/Visualization Utilities
-# ========================================================================
-def visualize_polygon_correction(img, translations, bubble_polygons, output_path=None):
     """
-    Create debug visualization showing:
-    - Original OCR polygons in red
-    - Detected bubbles in blue
-    - Corrected polygons in green
     """
-    from PIL import ImageDraw
-    debug_img = img.copy()
-    draw = ImageDraw.Draw(debug_img, 'RGBA')
-    # Draw bubbles in blue
-    for bubble in bubble_polygons:
-        draw.polygon(bubble, outline=(0, 0, 255, 128), width=2)
-    # Draw OCR polygons
-    for t in translations:
-        orig_poly = t.get("original_polygon")
-        corrected_poly = t.get("polygon")
-        # Original in red
-        if orig_poly:
-            draw.polygon(orig_poly, outline=(255, 0, 0, 128), width=2)
-        # Corrected in green
-        if corrected_poly:
-            draw.polygon(corrected_poly, outline=(0, 255, 0, 192), width=3)
-    if output_path:
-        debug_img.save(output_path)
-    return debug_img

 """
+Enhanced speech bubble detection for manga
 """
+import cv2
 import numpy as np
+from shapely.geometry import Polygon
+def detect_speech_bubbles(img_pil, min_area=500, max_area=None, debug=False):
     """
+    Detect speech bubbles in manga images.
     Args:
+        img_pil: PIL Image
+        min_area: Minimum bubble area in pixels
+        max_area: Maximum bubble area (None = 1/4 of image)
+        debug: If True, return debug info
+    Returns:
+        List of bubble polygons [(x,y), ...]
     """
+    img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    h, w = gray.shape
+    if max_area is None:
+        max_area = (h * w) // 4  # Max 1/4 of image
+    # Adaptive threshold handles varying lighting better
+    th = cv2.adaptiveThreshold(
+        gray, 255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        35, 10
+    )
+    inv = 255 - th  # Bubbles become white regions
+    # Close small gaps in bubble borders
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
+    cleaned = cv2.morphologyEx(inv, cv2.MORPH_CLOSE, kernel, iterations=2)
+    # Remove small noise
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
+    cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_OPEN, kernel_open, iterations=1)
+    contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    bubbles = []
+    debug_info = []
+    for cnt in contours:
+        area = cv2.contourArea(cnt)
+        # Filter by area
+        if area < min_area or area > max_area:
             continue
+        # Get bounding box
+        x, y, bw, bh = cv2.boundingRect(cnt)
+        # Filter by aspect ratio (too thin/wide = not a bubble)
+        aspect_ratio = max(bw, bh) / (min(bw, bh) + 1)
+        if aspect_ratio > 5:  # Too elongated
+            continue
+        # Check if shape is reasonably bubble-like
+        # Bubbles are usually somewhat round/elliptical
+        perimeter = cv2.arcLength(cnt, True)
+        circularity = 4 * np.pi * area / (perimeter * perimeter + 1)
+        # Approximate polygon
+        epsilon = 0.01 * perimeter
+        approx = cv2.approxPolyDP(cnt, epsilon, True)
+        poly = [(int(p[0][0]), int(p[0][1])) for p in approx]
+        # Store bubble
+        bubbles.append(poly)
+        if debug:
+            debug_info.append({
+                'area': area,
+                'aspect_ratio': aspect_ratio,
+                'circularity': circularity,
+                'vertices': len(poly),
+                'bbox': (x, y, bw, bh)
+            })
+    print(f"🎈 Detected {len(bubbles)} candidate bubbles")
+    if debug:
+        return bubbles, debug_info
+    return bubbles
+def merge_overlapping_bubbles(bubbles, iou_threshold=0.3):
     """
+    Merge bubbles that overlap significantly.
+    Useful when bubble detection creates multiple contours for one bubble.
     """
+    from shapely.geometry import Polygon
+    from shapely.ops import unary_union
+    if len(bubbles) <= 1:
+        return bubbles
+    # Convert to Shapely polygons
+    shapes = []
+    for b in bubbles:
+        try:
+            p = Polygon(b)
+            if not p.is_valid:
+                p = p.buffer(0)
+            shapes.append(p)
+        except:
+            continue
+    # Group overlapping bubbles
+    merged = []
+    used = set()
+    for i, shape1 in enumerate(shapes):
+        if i in used:
+            continue
+        group = [shape1]
+        used.add(i)
+        for j, shape2 in enumerate(shapes[i+1:], start=i+1):
+            if j in used:
+                continue
+            # Check overlap
+            intersection = shape1.intersection(shape2).area
+            union = shape1.union(shape2).area
+            iou = intersection / union if union > 0 else 0
+            if iou > iou_threshold:
+                group.append(shape2)
+                used.add(j)
+        # Merge group
+        if len(group) > 1:
+            merged_shape = unary_union(group)
+            if merged_shape.geom_type == 'Polygon':
+                merged.append(list(merged_shape.exterior.coords)[:-1])
+            else:
+                # Multiple separate regions - add them separately
+                for geom in merged_shape.geoms:
+                    if geom.geom_type == 'Polygon':
+                        merged.append(list(geom.exterior.coords)[:-1])
+        else:
+            merged.append(list(group[0].exterior.coords)[:-1])
+    print(f"🔄 Merged {len(bubbles)} bubbles → {len(merged)} bubbles")
+    return merged
+def filter_nested_bubbles(bubbles):
     """
+    Remove bubbles that are completely inside other bubbles.
+    Keeps the outer bubble.
     """
+    from shapely.geometry import Polygon
+    if len(bubbles) <= 1:
+        return bubbles
+    shapes = []
+    for b in bubbles:
+        try:
+            p = Polygon(b)
+            if not p.is_valid:
+                p = p.buffer(0)
+            shapes.append((p, b))
+        except:
+            continue
+    # Sort by area (largest first)
+    shapes.sort(key=lambda x: x[0].area, reverse=True)
+    filtered = []
+    for i, (shape1, poly1) in enumerate(shapes):
+        is_nested = False
+        for j, (shape2, poly2) in enumerate(shapes):
+            if i == j:
+                continue
+            # Check if shape1 is inside shape2
+            if shape2.contains(shape1):
+                is_nested = True
+                break
+        if not is_nested:
+            filtered.append(poly1)
+    if len(filtered) < len(bubbles):
+        print(f"🗑️ Removed {len(bubbles) - len(filtered)} nested bubbles")
+    return filtered
+def detect_speech_bubbles_robust(img_pil, min_area=500, merge_overlaps=True, filter_nested=True):
+    """
+    Robust bubble detection with post-processing.
+    This is the recommended function to use.
+    """
+    # Initial detection
+    bubbles = detect_speech_bubbles(img_pil, min_area=min_area)
+    if len(bubbles) == 0:
+        return []
+    # Post-processing
+    if merge_overlaps:
+        bubbles = merge_overlapping_bubbles(bubbles)
+    if filter_nested:
+        bubbles = filter_nested_bubbles(bubbles)
+    print(f"✅ Final: {len(bubbles)} speech bubbles")
+    return bubbles