Spaces:
Sleeping
Sleeping
Update utils/bubble_utils.py
Browse files- utils/bubble_utils.py +43 -70
utils/bubble_utils.py
CHANGED
|
@@ -15,7 +15,7 @@ from utils.polygon_utils import (
|
|
| 15 |
)
|
| 16 |
from utils.bubble_detect import detect_speech_bubbles_robust
|
| 17 |
from utils.u2net_detector import detect_bubbles_u2net
|
| 18 |
-
from utils.bubble_detect_rtdetr import
|
| 19 |
|
| 20 |
def bbox_to_polygon(bbox):
|
| 21 |
"""
|
|
@@ -78,124 +78,97 @@ def visualize_all_debug(img, translations, bubble_polygons, step_name="debug", p
|
|
| 78 |
# ===================== Main Bubble Translation Pipeline ===================
|
| 79 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 80 |
"""
|
| 81 |
-
|
| 82 |
-
1. Detect speech bubbles
|
| 83 |
-
2. OCR full page
|
| 84 |
-
3. Correct OCR polygons using bubble polygons
|
| 85 |
-
4. Render translated text using corrected polygons
|
| 86 |
-
5. Split into chunks
|
| 87 |
"""
|
|
|
|
| 88 |
# -------------------------------------------------------
|
| 89 |
-
# 1) Load image
|
| 90 |
# -------------------------------------------------------
|
| 91 |
filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
|
| 92 |
-
print(f"π bubble_pipeline_single:
|
| 93 |
|
| 94 |
# -------------------------------------------------------
|
| 95 |
-
# 2)
|
| 96 |
# -------------------------------------------------------
|
| 97 |
-
detections = detect_bubbles_rtdetr(full_img)
|
| 98 |
-
|
| 99 |
-
bubble_boxes = [d["bbox"] for d in detections if d["class"] == 0] # pure bubble shapes
|
| 100 |
-
bubble_text_boxes = [d["bbox"] for d in detections if d["class"] == 1] # text inside bubble
|
| 101 |
-
free_text_boxes = [d["bbox"] for d in detections if d["class"] == 2] # text outside bubble
|
| 102 |
|
| 103 |
-
|
| 104 |
-
bubble_polygons = [bbox_to_polygon(b) for b in bubble_boxes]
|
| 105 |
-
|
| 106 |
-
print(f"π RT-DETR: {len(bubble_polygons)} bubbles, "
|
| 107 |
-
f"{len(bubble_text_boxes)} bubble-text regions, "
|
| 108 |
-
f"{len(free_text_boxes)} free-text regions")
|
| 109 |
-
|
| 110 |
|
| 111 |
if debug:
|
| 112 |
-
visualize_all_debug(
|
| 113 |
-
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
# -------------------------------------------------------
|
| 117 |
-
# 3) OCR
|
| 118 |
# -------------------------------------------------------
|
| 119 |
translations = extract_and_translate_chunk(full_img)
|
| 120 |
-
print(f"π OCR found {len(translations)}
|
| 121 |
|
| 122 |
-
if
|
| 123 |
-
print("β οΈ No OCR text detected β fallback")
|
| 124 |
return fallback_empty(file_obj, num_chunks, full_img)
|
| 125 |
|
| 126 |
-
#
|
| 127 |
for t in translations:
|
| 128 |
-
|
| 129 |
-
t["original_polygon"] = t["polygon"]
|
| 130 |
|
| 131 |
# -------------------------------------------------------
|
| 132 |
-
# 4)
|
| 133 |
# -------------------------------------------------------
|
| 134 |
-
if
|
| 135 |
-
print(
|
|
|
|
| 136 |
translations = correct_ocr_polygons_with_bubbles(
|
| 137 |
-
translations,
|
| 138 |
-
bubble_polygons,
|
| 139 |
-
strategy=polygon_strategy,
|
| 140 |
)
|
|
|
|
| 141 |
matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
|
| 142 |
-
print(f"β
|
| 143 |
-
else:
|
| 144 |
-
print("β οΈ No bubble polygons detected β skipping polygon correction")
|
| 145 |
|
| 146 |
if debug:
|
| 147 |
-
visualize_all_debug(
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
|
| 151 |
# -------------------------------------------------------
|
| 152 |
-
# 5) Render translated text
|
| 153 |
# -------------------------------------------------------
|
| 154 |
translated_full = full_img.copy()
|
| 155 |
|
| 156 |
for t in translations:
|
| 157 |
-
|
| 158 |
-
original_poly = t.get("original_polygon")
|
| 159 |
translated_text = t.get("translated", "")
|
| 160 |
-
|
| 161 |
-
if not corrected_poly or not translated_text:
|
| 162 |
continue
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
bubble_poly = bubble_polygons[idx]
|
| 169 |
-
|
| 170 |
-
# Render polygon is slightly shrunk
|
| 171 |
-
render_poly = shrink_or_expand_polygon(corrected_poly, shrink_ratio=0.92)
|
| 172 |
|
| 173 |
translated_full = draw_translated_text_convex(
|
| 174 |
translated_full,
|
| 175 |
-
polygon_coords=
|
| 176 |
text=translated_text,
|
| 177 |
font_path=FONT_PATH,
|
| 178 |
font_scale=1.0,
|
| 179 |
-
original_polygon=
|
| 180 |
-
bubble_polygon=
|
| 181 |
)
|
| 182 |
|
| 183 |
# -------------------------------------------------------
|
| 184 |
# 6) Split for UI
|
| 185 |
# -------------------------------------------------------
|
| 186 |
if num_chunks > 1:
|
| 187 |
-
_, _,
|
| 188 |
-
|
| 189 |
else:
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
# -------------------------------------------------------
|
| 194 |
-
# 7) Return output
|
| 195 |
-
# -------------------------------------------------------
|
| 196 |
-
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 197 |
-
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 198 |
|
|
|
|
|
|
|
| 199 |
table_data = [[t["original"], t["translated"]] for t in translations]
|
| 200 |
|
| 201 |
return filename, orig_html, trans_html, table_data, [translations]
|
|
|
|
| 15 |
)
|
| 16 |
from utils.bubble_detect import detect_speech_bubbles_robust
|
| 17 |
from utils.u2net_detector import detect_bubbles_u2net
|
| 18 |
+
from utils.bubble_detect_rtdetr import detect_and_refine_bubbles
|
| 19 |
|
| 20 |
def bbox_to_polygon(bbox):
|
| 21 |
"""
|
|
|
|
| 78 |
# ===================== Main Bubble Translation Pipeline ===================
|
| 79 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 80 |
"""
|
| 81 |
+
Manga bubble-aware translation pipeline using RT-DETR bubble detection.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
+
|
| 84 |
# -------------------------------------------------------
|
| 85 |
+
# 1) Load full image
|
| 86 |
# -------------------------------------------------------
|
| 87 |
filename, full_img, _ = load_and_split_image(file_obj, num_chunks=1)
|
| 88 |
+
print(f"π bubble_pipeline_single: {filename}, size={full_img.size}")
|
| 89 |
|
| 90 |
# -------------------------------------------------------
|
| 91 |
+
# 2) Detect & refine bubbles with RT-DETR
|
| 92 |
# -------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
bubble_polygons, interior_polygons = detect_and_refine_bubbles(full_img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
if debug:
|
| 97 |
+
visualize_all_debug(
|
| 98 |
+
full_img, [], bubble_polygons,
|
| 99 |
+
step_name="bubbles_only", prefix="bubble_dbg"
|
| 100 |
+
)
|
| 101 |
|
| 102 |
# -------------------------------------------------------
|
| 103 |
+
# 3) OCR full-page
|
| 104 |
# -------------------------------------------------------
|
| 105 |
translations = extract_and_translate_chunk(full_img)
|
| 106 |
+
print(f"π OCR found {len(translations)} regions")
|
| 107 |
|
| 108 |
+
if not translations:
|
|
|
|
| 109 |
return fallback_empty(file_obj, num_chunks, full_img)
|
| 110 |
|
| 111 |
+
# save original polygon for visualization
|
| 112 |
for t in translations:
|
| 113 |
+
t["original_polygon"] = t.get("polygon")
|
|
|
|
| 114 |
|
| 115 |
# -------------------------------------------------------
|
| 116 |
+
# 4) Match OCR text regions with refined bubble polygons
|
| 117 |
# -------------------------------------------------------
|
| 118 |
+
if bubble_polygons:
|
| 119 |
+
print("β¨ Correcting OCR polygons using refined bubbles...")
|
| 120 |
+
|
| 121 |
translations = correct_ocr_polygons_with_bubbles(
|
| 122 |
+
translations, bubble_polygons, strategy=polygon_strategy
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
+
|
| 125 |
matched = sum(1 for t in translations if t.get("matched_bubble_idx") is not None)
|
| 126 |
+
print(f"β
Polygons matched to bubbles: {matched}/{len(translations)}")
|
|
|
|
|
|
|
| 127 |
|
| 128 |
if debug:
|
| 129 |
+
visualize_all_debug(
|
| 130 |
+
full_img, translations, bubble_polygons,
|
| 131 |
+
step_name="after_correction", prefix="bubble_dbg"
|
| 132 |
+
)
|
| 133 |
|
| 134 |
# -------------------------------------------------------
|
| 135 |
+
# 5) Render translated text using INTERIOR polygons
|
| 136 |
# -------------------------------------------------------
|
| 137 |
translated_full = full_img.copy()
|
| 138 |
|
| 139 |
for t in translations:
|
| 140 |
+
idx = t.get("matched_bubble_idx")
|
|
|
|
| 141 |
translated_text = t.get("translated", "")
|
| 142 |
+
if not translated_text:
|
|
|
|
| 143 |
continue
|
| 144 |
|
| 145 |
+
if idx is not None and idx < len(interior_polygons):
|
| 146 |
+
render_poly = interior_polygons[idx] # β refined interior polygon
|
| 147 |
+
else:
|
| 148 |
+
render_poly = shrink_or_expand_polygon(t["polygon"], shrink_ratio=0.92)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
translated_full = draw_translated_text_convex(
|
| 151 |
translated_full,
|
| 152 |
+
polygon_coords=render_poly,
|
| 153 |
text=translated_text,
|
| 154 |
font_path=FONT_PATH,
|
| 155 |
font_scale=1.0,
|
| 156 |
+
original_polygon=t.get("original_polygon"),
|
| 157 |
+
bubble_polygon=bubble_polygons[idx] if idx is not None else None
|
| 158 |
)
|
| 159 |
|
| 160 |
# -------------------------------------------------------
|
| 161 |
# 6) Split for UI
|
| 162 |
# -------------------------------------------------------
|
| 163 |
if num_chunks > 1:
|
| 164 |
+
_, _, orig_chunks = load_and_split_image(file_obj, num_chunks)
|
| 165 |
+
trans_chunks = split_image_into_chunks(translated_full, num_chunks)
|
| 166 |
else:
|
| 167 |
+
orig_chunks = [full_img]
|
| 168 |
+
trans_chunks = [translated_full]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
orig_html = "".join([encode_image_to_html(c) for c in orig_chunks])
|
| 171 |
+
trans_html = "".join([encode_image_to_html(c) for c in trans_chunks])
|
| 172 |
table_data = [[t["original"], t["translated"]] for t in translations]
|
| 173 |
|
| 174 |
return filename, orig_html, trans_html, table_data, [translations]
|