Spaces:
Runtime error
Runtime error
Update utils/bubble_utils.py
Browse files- utils/bubble_utils.py +54 -29
utils/bubble_utils.py
CHANGED
|
@@ -62,44 +62,52 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
|
|
| 62 |
|
| 63 |
|
| 64 |
# ===================== Main Bubble Translation Pipeline ===================
|
| 65 |
-
|
| 66 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 67 |
"""
|
| 68 |
End-to-end bubble translation pipeline:
|
| 69 |
-
1.
|
| 70 |
-
2. OCR
|
| 71 |
-
3. Correct OCR polygons using
|
| 72 |
-
4.
|
| 73 |
-
5. Split into chunks
|
| 74 |
-
|
| 75 |
-
Args:
|
| 76 |
-
file_obj: Uploaded file object or path
|
| 77 |
-
num_chunks: #chunks for UI display
|
| 78 |
-
polygon_strategy: "hybrid", "bubble", "intersect", "expand"
|
| 79 |
-
debug: if True, saves debug overlay PNGs
|
| 80 |
"""
|
| 81 |
-
#
|
|
|
|
|
|
|
| 82 |
filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
|
| 83 |
print(f"π bubble_pipeline_single: filename={filename}, size={full_img.size}")
|
| 84 |
|
|
|
|
| 85 |
# 2) Robust bubble detection
|
|
|
|
| 86 |
bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
|
| 87 |
-
print(f"π Detected {len(bubble_polygons)} speech bubbles
|
| 88 |
|
| 89 |
if debug:
|
| 90 |
-
visualize_all_debug(full_img, [], bubble_polygons,
|
|
|
|
|
|
|
| 91 |
|
|
|
|
| 92 |
# 3) OCR globally
|
|
|
|
| 93 |
translations = extract_and_translate_chunk(full_img)
|
| 94 |
print(f"π OCR found {len(translations)} text regions")
|
| 95 |
|
| 96 |
if len(translations) == 0:
|
| 97 |
-
print("β οΈ OCR
|
| 98 |
return fallback_empty(file_obj, num_chunks, full_img)
|
| 99 |
|
| 100 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
if len(bubble_polygons) > 0:
|
| 102 |
-
print(f"β¨ Correcting OCR polygons using
|
| 103 |
translations = correct_ocr_polygons_with_bubbles(
|
| 104 |
translations,
|
| 105 |
bubble_polygons,
|
|
@@ -108,32 +116,48 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
|
|
| 108 |
matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
|
| 109 |
print(f"β
Matched {matched}/{len(translations)} OCR regions to bubbles")
|
| 110 |
else:
|
| 111 |
-
print("β οΈ No
|
| 112 |
|
| 113 |
if debug:
|
| 114 |
-
visualize_all_debug(full_img, translations, bubble_polygons,
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
#
|
|
|
|
|
|
|
| 117 |
translated_full = full_img.copy()
|
| 118 |
|
| 119 |
for t in translations:
|
| 120 |
-
|
|
|
|
| 121 |
translated_text = t.get("translated", "")
|
| 122 |
|
| 123 |
-
if not
|
| 124 |
continue
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
translated_full = draw_translated_text_convex(
|
| 129 |
translated_full,
|
| 130 |
-
|
| 131 |
-
translated_text,
|
| 132 |
font_path=FONT_PATH,
|
| 133 |
font_scale=1.0,
|
|
|
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
-
#
|
|
|
|
|
|
|
| 137 |
if num_chunks > 1:
|
| 138 |
_, _, chunks = load_and_split_image(file_obj, num_chunks)
|
| 139 |
translated_chunks = split_image_into_chunks(translated_full, num_chunks)
|
|
@@ -141,7 +165,9 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
|
|
| 141 |
chunks = [full_img]
|
| 142 |
translated_chunks = [translated_full]
|
| 143 |
|
| 144 |
-
#
|
|
|
|
|
|
|
| 145 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 146 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 147 |
|
|
@@ -149,7 +175,6 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
|
|
| 149 |
|
| 150 |
return filename, orig_html, trans_html, table_data, [translations]
|
| 151 |
|
| 152 |
-
|
| 153 |
def split_image_into_chunks(img, num_chunks):
|
| 154 |
"""
|
| 155 |
Simple vertical splitting for the translated image.
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
# ===================== Main Bubble Translation Pipeline ===================
|
|
|
|
| 65 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 66 |
"""
|
| 67 |
End-to-end bubble translation pipeline:
|
| 68 |
+
1. Detect speech bubbles
|
| 69 |
+
2. OCR full page
|
| 70 |
+
3. Correct OCR polygons using bubble polygons
|
| 71 |
+
4. Render translated text using corrected polygons
|
| 72 |
+
5. Split into chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
"""
|
| 74 |
+
# -------------------------------------------------------
|
| 75 |
+
# 1) Load image
|
| 76 |
+
# -------------------------------------------------------
|
| 77 |
filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
|
| 78 |
print(f"π bubble_pipeline_single: filename={filename}, size={full_img.size}")
|
| 79 |
|
| 80 |
+
# -------------------------------------------------------
|
| 81 |
# 2) Robust bubble detection
|
| 82 |
+
# -------------------------------------------------------
|
| 83 |
bubble_polygons = detect_speech_bubbles_robust(full_img, min_area=400)
|
| 84 |
+
print(f"π Detected {len(bubble_polygons)} speech bubbles")
|
| 85 |
|
| 86 |
if debug:
|
| 87 |
+
visualize_all_debug(full_img, [], bubble_polygons,
|
| 88 |
+
step_name="bubbles_only",
|
| 89 |
+
prefix="bubble_dbg")
|
| 90 |
|
| 91 |
+
# -------------------------------------------------------
|
| 92 |
# 3) OCR globally
|
| 93 |
+
# -------------------------------------------------------
|
| 94 |
translations = extract_and_translate_chunk(full_img)
|
| 95 |
print(f"π OCR found {len(translations)} text regions")
|
| 96 |
|
| 97 |
if len(translations) == 0:
|
| 98 |
+
print("β οΈ No OCR text detected β fallback")
|
| 99 |
return fallback_empty(file_obj, num_chunks, full_img)
|
| 100 |
|
| 101 |
+
# SAVE ORIGINAL POLYGONS for debugging
|
| 102 |
+
for t in translations:
|
| 103 |
+
if "polygon" in t:
|
| 104 |
+
t["original_polygon"] = t["polygon"]
|
| 105 |
+
|
| 106 |
+
# -------------------------------------------------------
|
| 107 |
+
# 4) Correct OCR polygons using bubble polygons
|
| 108 |
+
# -------------------------------------------------------
|
| 109 |
if len(bubble_polygons) > 0:
|
| 110 |
+
print(f"β¨ Correcting OCR polygons using bubble strategy: {polygon_strategy}")
|
| 111 |
translations = correct_ocr_polygons_with_bubbles(
|
| 112 |
translations,
|
| 113 |
bubble_polygons,
|
|
|
|
| 116 |
matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
|
| 117 |
print(f"β
Matched {matched}/{len(translations)} OCR regions to bubbles")
|
| 118 |
else:
|
| 119 |
+
print("β οΈ No bubble polygons detected β skipping polygon correction")
|
| 120 |
|
| 121 |
if debug:
|
| 122 |
+
visualize_all_debug(full_img, translations, bubble_polygons,
|
| 123 |
+
step_name="after_correction",
|
| 124 |
+
prefix="bubble_dbg")
|
| 125 |
|
| 126 |
+
# -------------------------------------------------------
|
| 127 |
+
# 5) Render translated text
|
| 128 |
+
# -------------------------------------------------------
|
| 129 |
translated_full = full_img.copy()
|
| 130 |
|
| 131 |
for t in translations:
|
| 132 |
+
corrected_poly = t.get("polygon")
|
| 133 |
+
original_poly = t.get("original_polygon")
|
| 134 |
translated_text = t.get("translated", "")
|
| 135 |
|
| 136 |
+
if not corrected_poly or not translated_text:
|
| 137 |
continue
|
| 138 |
|
| 139 |
+
# Get bubble polygon (if matched)
|
| 140 |
+
bubble_poly = None
|
| 141 |
+
idx = t.get("matched_bubble_idx")
|
| 142 |
+
if idx is not None and 0 <= idx < len(bubble_polygons):
|
| 143 |
+
bubble_poly = bubble_polygons[idx]
|
| 144 |
+
|
| 145 |
+
# Render polygon is slightly shrunk
|
| 146 |
+
render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
|
| 147 |
|
| 148 |
translated_full = draw_translated_text_convex(
|
| 149 |
translated_full,
|
| 150 |
+
polygon_coords=corrected_poly, # corrected
|
| 151 |
+
text=translated_text,
|
| 152 |
font_path=FONT_PATH,
|
| 153 |
font_scale=1.0,
|
| 154 |
+
original_polygon=original_poly, # RED
|
| 155 |
+
bubble_polygon=bubble_poly # BLUE
|
| 156 |
)
|
| 157 |
|
| 158 |
+
# -------------------------------------------------------
|
| 159 |
+
# 6) Split for UI
|
| 160 |
+
# -------------------------------------------------------
|
| 161 |
if num_chunks > 1:
|
| 162 |
_, _, chunks = load_and_split_image(file_obj, num_chunks)
|
| 163 |
translated_chunks = split_image_into_chunks(translated_full, num_chunks)
|
|
|
|
| 165 |
chunks = [full_img]
|
| 166 |
translated_chunks = [translated_full]
|
| 167 |
|
| 168 |
+
# -------------------------------------------------------
|
| 169 |
+
# 7) Return output
|
| 170 |
+
# -------------------------------------------------------
|
| 171 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 172 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 173 |
|
|
|
|
| 175 |
|
| 176 |
return filename, orig_html, trans_html, table_data, [translations]
|
| 177 |
|
|
|
|
| 178 |
def split_image_into_chunks(img, num_chunks):
|
| 179 |
"""
|
| 180 |
Simple vertical splitting for the translated image.
|