Spaces:
Runtime error
Runtime error
Update utils/bubble_utils.py
Browse files- utils/bubble_utils.py +120 -40
utils/bubble_utils.py
CHANGED
|
@@ -1,20 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
from PIL import Image
|
| 3 |
from utils.image_utils import load_and_split_image
|
| 4 |
from utils.ocr_utils import extract_and_translate_chunk
|
| 5 |
-
from utils.polygon_utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from utils.bubble_detect import detect_speech_bubbles
|
| 7 |
from utils.image_utils import encode_image_to_html
|
| 8 |
|
| 9 |
|
| 10 |
-
def bubble_pipeline_single(file_obj, num_chunks=1):
|
| 11 |
"""
|
| 12 |
-
End-to-end bubble translation pipeline:
|
| 13 |
1. Global bubble detection
|
| 14 |
2. OCR text extraction
|
| 15 |
-
3.
|
| 16 |
-
4. Inpaint + redraw inside
|
| 17 |
-
5.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
# ----------------------------------------------------------------------
|
|
@@ -27,49 +41,55 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
|
|
| 27 |
# 2. Global speech bubble detection
|
| 28 |
# ----------------------------------------------------------------------
|
| 29 |
bubble_polygons = detect_speech_bubbles(full_img)
|
| 30 |
-
|
| 31 |
-
if len(bubble_polygons) == 0:
|
| 32 |
-
print("⚠️ No bubbles detected → FALLBACK to OCR-only pipeline.")
|
| 33 |
-
return fallback_ocr_pipeline(file_obj, num_chunks)
|
| 34 |
|
| 35 |
# ----------------------------------------------------------------------
|
| 36 |
# 3. OCR detection (global)
|
| 37 |
# ----------------------------------------------------------------------
|
| 38 |
translations = extract_and_translate_chunk(full_img)
|
|
|
|
| 39 |
|
| 40 |
if len(translations) == 0:
|
| 41 |
-
print("⚠️ OCR found no text →
|
| 42 |
-
return
|
| 43 |
|
| 44 |
# ----------------------------------------------------------------------
|
| 45 |
-
# 4.
|
| 46 |
# ----------------------------------------------------------------------
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# ----------------------------------------------------------------------
|
| 58 |
# 5. Render onto a working copy of full image
|
| 59 |
# ----------------------------------------------------------------------
|
| 60 |
translated_full = full_img.copy()
|
| 61 |
|
| 62 |
-
for t
|
| 63 |
-
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
translated_full = draw_translated_text_convex(
|
| 69 |
translated_full,
|
| 70 |
-
|
| 71 |
translated_text,
|
| 72 |
-
font_path=FONT_PATH,
|
| 73 |
font_scale=1.0
|
| 74 |
)
|
| 75 |
|
|
@@ -78,13 +98,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
|
|
| 78 |
# ----------------------------------------------------------------------
|
| 79 |
if num_chunks > 1:
|
| 80 |
_, _, chunks = load_and_split_image(file_obj, num_chunks)
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
_, _, trans_chunks = load_and_split_image_image_obj(
|
| 85 |
-
translated_full, num_chunks
|
| 86 |
-
)
|
| 87 |
-
translated_chunks = trans_chunks
|
| 88 |
else:
|
| 89 |
chunks = [full_img]
|
| 90 |
translated_chunks = [translated_full]
|
|
@@ -95,18 +111,33 @@ def bubble_pipeline_single(file_obj, num_chunks=1):
|
|
| 95 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 96 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 97 |
|
| 98 |
-
#
|
| 99 |
table_data = [[t["original"], t["translated"]] for t in translations]
|
| 100 |
|
| 101 |
return filename, orig_html, trans_html, table_data, [translations]
|
| 102 |
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
from utils.polygon_utils import render_translated_chunk
|
| 109 |
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def fallback_ocr_pipeline(file_obj, num_chunks):
|
| 112 |
"""
|
|
@@ -134,3 +165,52 @@ def fallback_ocr_pipeline(file_obj, num_chunks):
|
|
| 134 |
trans = "".join([encode_image_to_html(t) for t in translated_images])
|
| 135 |
|
| 136 |
return filename, orig, trans, all_tables, all_translations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Enhanced bubble detection pipeline with polygon correction
|
| 3 |
+
"""
|
| 4 |
import numpy as np
|
| 5 |
from PIL import Image
|
| 6 |
from utils.image_utils import load_and_split_image
|
| 7 |
from utils.ocr_utils import extract_and_translate_chunk
|
| 8 |
+
from utils.polygon_utils import (
|
| 9 |
+
draw_translated_text_convex,
|
| 10 |
+
shrink_or_expand_polygon,
|
| 11 |
+
FONT_PATH,
|
| 12 |
+
correct_ocr_polygons_with_bubbles,
|
| 13 |
+
render_translated_chunk
|
| 14 |
+
)
|
| 15 |
from utils.bubble_detect import detect_speech_bubbles
|
| 16 |
from utils.image_utils import encode_image_to_html
|
| 17 |
|
| 18 |
|
| 19 |
+
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid"):
|
| 20 |
"""
|
| 21 |
+
End-to-end bubble translation pipeline with polygon correction:
|
| 22 |
1. Global bubble detection
|
| 23 |
2. OCR text extraction
|
| 24 |
+
3. Correct OCR polygons using detected bubbles
|
| 25 |
+
4. Inpaint + redraw inside corrected polygons
|
| 26 |
+
5. Split into chunks if needed
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
file_obj: Input image file
|
| 30 |
+
num_chunks: Number of panels to split into
|
| 31 |
+
polygon_strategy: How to correct polygons ("hybrid", "bubble", "intersect", "expand")
|
| 32 |
"""
|
| 33 |
|
| 34 |
# ----------------------------------------------------------------------
|
|
|
|
| 41 |
# 2. Global speech bubble detection
|
| 42 |
# ----------------------------------------------------------------------
|
| 43 |
bubble_polygons = detect_speech_bubbles(full_img)
|
| 44 |
+
print(f"🔍 Detected {len(bubble_polygons)} speech bubbles")
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# ----------------------------------------------------------------------
|
| 47 |
# 3. OCR detection (global)
|
| 48 |
# ----------------------------------------------------------------------
|
| 49 |
translations = extract_and_translate_chunk(full_img)
|
| 50 |
+
print(f"📝 OCR found {len(translations)} text regions")
|
| 51 |
|
| 52 |
if len(translations) == 0:
|
| 53 |
+
print("⚠️ OCR found no text → showing original image")
|
| 54 |
+
return fallback_empty(file_obj, num_chunks, full_img)
|
| 55 |
|
| 56 |
# ----------------------------------------------------------------------
|
| 57 |
+
# 4. Correct OCR polygons using bubble detection
|
| 58 |
# ----------------------------------------------------------------------
|
| 59 |
+
if len(bubble_polygons) > 0:
|
| 60 |
+
print(f"✨ Correcting OCR polygons using bubble detection (strategy: {polygon_strategy})")
|
| 61 |
+
translations = correct_ocr_polygons_with_bubbles(
|
| 62 |
+
translations,
|
| 63 |
+
bubble_polygons,
|
| 64 |
+
strategy=polygon_strategy
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Stats
|
| 68 |
+
matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
|
| 69 |
+
print(f"✅ Matched {matched}/{len(translations)} text regions to bubbles")
|
| 70 |
+
else:
|
| 71 |
+
print("⚠️ No bubbles detected → using original OCR polygons")
|
| 72 |
|
| 73 |
# ----------------------------------------------------------------------
|
| 74 |
# 5. Render onto a working copy of full image
|
| 75 |
# ----------------------------------------------------------------------
|
| 76 |
translated_full = full_img.copy()
|
| 77 |
|
| 78 |
+
for t in translations:
|
| 79 |
+
polygon = t.get("polygon")
|
| 80 |
+
translated_text = t.get("translated", "")
|
| 81 |
|
| 82 |
+
if not polygon or not translated_text:
|
| 83 |
+
continue
|
| 84 |
+
|
| 85 |
+
# Slightly shrink for better visual appearance
|
| 86 |
+
render_poly = shrink_or_expand_polygon(polygon, shrink_ratio=0.92)
|
| 87 |
|
| 88 |
translated_full = draw_translated_text_convex(
|
| 89 |
translated_full,
|
| 90 |
+
render_poly,
|
| 91 |
translated_text,
|
| 92 |
+
font_path=FONT_PATH,
|
| 93 |
font_scale=1.0
|
| 94 |
)
|
| 95 |
|
|
|
|
| 98 |
# ----------------------------------------------------------------------
|
| 99 |
if num_chunks > 1:
|
| 100 |
_, _, chunks = load_and_split_image(file_obj, num_chunks)
|
| 101 |
+
|
| 102 |
+
# Split translated image the same way
|
| 103 |
+
translated_chunks = split_image_into_chunks(translated_full, num_chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
else:
|
| 105 |
chunks = [full_img]
|
| 106 |
translated_chunks = [translated_full]
|
|
|
|
| 111 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 112 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 113 |
|
| 114 |
+
# Table for manual edit
|
| 115 |
table_data = [[t["original"], t["translated"]] for t in translations]
|
| 116 |
|
| 117 |
return filename, orig_html, trans_html, table_data, [translations]
|
| 118 |
|
| 119 |
|
| 120 |
+
def split_image_into_chunks(img, num_chunks):
|
| 121 |
+
"""Split PIL Image vertically into equal chunks"""
|
| 122 |
+
if num_chunks <= 1:
|
| 123 |
+
return [img]
|
| 124 |
+
|
| 125 |
+
width, height = img.size
|
| 126 |
+
chunk_height = height // num_chunks
|
| 127 |
+
chunks = []
|
| 128 |
+
|
| 129 |
+
for i in range(num_chunks):
|
| 130 |
+
top = i * chunk_height
|
| 131 |
+
bottom = height if i == num_chunks - 1 else (i + 1) * chunk_height
|
| 132 |
+
chunk = img.crop((0, top, width, bottom))
|
| 133 |
+
chunks.append(chunk)
|
| 134 |
+
|
| 135 |
+
return chunks
|
| 136 |
|
|
|
|
| 137 |
|
| 138 |
+
# ========================================================================
|
| 139 |
+
# Fallback Pipelines
|
| 140 |
+
# ========================================================================
|
| 141 |
|
| 142 |
def fallback_ocr_pipeline(file_obj, num_chunks):
|
| 143 |
"""
|
|
|
|
| 165 |
trans = "".join([encode_image_to_html(t) for t in translated_images])
|
| 166 |
|
| 167 |
return filename, orig, trans, all_tables, all_translations
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def fallback_empty(file_obj, num_chunks, full_img):
|
| 171 |
+
"""Fallback when no text is detected"""
|
| 172 |
+
filename, _, chunks = load_and_split_image(file_obj, num_chunks)
|
| 173 |
+
|
| 174 |
+
orig = "".join([encode_image_to_html(c) for c in chunks])
|
| 175 |
+
trans = orig # No translation to show
|
| 176 |
+
|
| 177 |
+
return filename, orig, trans, [], [[]]
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# ========================================================================
|
| 181 |
+
# Debug/Visualization Utilities
|
| 182 |
+
# ========================================================================
|
| 183 |
+
|
| 184 |
+
def visualize_polygon_correction(img, translations, bubble_polygons, output_path=None):
|
| 185 |
+
"""
|
| 186 |
+
Create debug visualization showing:
|
| 187 |
+
- Original OCR polygons in red
|
| 188 |
+
- Detected bubbles in blue
|
| 189 |
+
- Corrected polygons in green
|
| 190 |
+
"""
|
| 191 |
+
from PIL import ImageDraw
|
| 192 |
+
|
| 193 |
+
debug_img = img.copy()
|
| 194 |
+
draw = ImageDraw.Draw(debug_img, 'RGBA')
|
| 195 |
+
|
| 196 |
+
# Draw bubbles in blue
|
| 197 |
+
for bubble in bubble_polygons:
|
| 198 |
+
draw.polygon(bubble, outline=(0, 0, 255, 128), width=2)
|
| 199 |
+
|
| 200 |
+
# Draw OCR polygons
|
| 201 |
+
for t in translations:
|
| 202 |
+
orig_poly = t.get("original_polygon")
|
| 203 |
+
corrected_poly = t.get("polygon")
|
| 204 |
+
|
| 205 |
+
# Original in red
|
| 206 |
+
if orig_poly:
|
| 207 |
+
draw.polygon(orig_poly, outline=(255, 0, 0, 128), width=2)
|
| 208 |
+
|
| 209 |
+
# Corrected in green
|
| 210 |
+
if corrected_poly:
|
| 211 |
+
draw.polygon(corrected_poly, outline=(0, 255, 0, 192), width=3)
|
| 212 |
+
|
| 213 |
+
if output_path:
|
| 214 |
+
debug_img.save(output_path)
|
| 215 |
+
|
| 216 |
+
return debug_img
|