Spaces:
Runtime error
Runtime error
Update utils/bubble_utils.py
Browse files- utils/bubble_utils.py +58 -62
utils/bubble_utils.py
CHANGED
|
@@ -177,129 +177,124 @@ def visualize_all_debug(
|
|
| 177 |
|
| 178 |
return out_path
|
| 179 |
|
| 180 |
-
|
|
|
|
| 181 |
# ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
|
| 182 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 183 |
"""
|
| 184 |
Manga bubble-aware translation pipeline (CHUNK-BASED):
|
| 185 |
-
- Split image into
|
| 186 |
- For each chunk:
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
"""
|
| 196 |
|
| 197 |
debug_files = []
|
| 198 |
|
| 199 |
# -------------------------------------------------------
|
| 200 |
-
# 1. Load & split image into chunks
|
| 201 |
# -------------------------------------------------------
|
| 202 |
filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
|
| 203 |
print(f"π bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
|
| 204 |
|
| 205 |
-
all_translations = []
|
| 206 |
-
all_tables = []
|
| 207 |
-
translated_chunks = []
|
| 208 |
-
|
| 209 |
-
y_offset = 0 # track position of each chunk in the full image
|
| 210 |
|
| 211 |
# -------------------------------------------------------
|
| 212 |
# 2. Process each chunk independently
|
| 213 |
# -------------------------------------------------------
|
| 214 |
for ci, chunk in enumerate(chunks):
|
| 215 |
print(f"\n================ CHUNK {ci} ================")
|
| 216 |
-
|
|
|
|
| 217 |
|
| 218 |
-
# ---- A) Detect &
|
| 219 |
bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
|
| 220 |
print(f"π Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
|
| 221 |
|
| 222 |
-
#
|
| 223 |
-
bubble_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in bubble_polygons ]
|
| 224 |
-
interior_polygons = [ [(x, y + y_offset) for (x, y) in poly] for poly in interior_polygons ]
|
| 225 |
-
bubble_boxes = [ (x1, y1 + y_offset, x2, y2 + y_offset) for (x1,y1,x2,y2) in bubble_boxes ]
|
| 226 |
-
|
| 227 |
-
# ---- Debug plot: Bubble shapes only ----
|
| 228 |
if debug:
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
debug_img, [], bubble_polygons, bubble_boxes=bubble_boxes,
|
| 232 |
step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
|
| 233 |
)
|
| 234 |
-
debug_files.append(
|
| 235 |
|
| 236 |
-
# ---- B) Mask-based OCR
|
| 237 |
-
print(f"π Chunk {ci}: masked OCR...")
|
| 238 |
-
translations = extract_and_translate_with_masks(
|
| 239 |
|
|
|
|
| 240 |
if not translations:
|
| 241 |
-
print(f"β οΈ Chunk {ci}:
|
| 242 |
translations = extract_and_translate_chunk(chunk)
|
| 243 |
|
| 244 |
-
# add back offset for OCR polygons
|
| 245 |
-
for t in translations:
|
| 246 |
-
if t["polygon"]:
|
| 247 |
-
t["polygon"] = [(x, y + y_offset) for (x, y) in t["polygon"]]
|
| 248 |
-
|
| 249 |
print(f"β¬οΈ Chunk {ci}: OCR detections = {len(translations)}")
|
| 250 |
|
| 251 |
-
#
|
| 252 |
for t in translations:
|
| 253 |
t["original_polygon"] = t.get("polygon")
|
| 254 |
|
| 255 |
-
# ---- C) Bubble-aware
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
# Debug: after
|
| 259 |
if debug:
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
debug_img2, translations, bubble_polygons, bubble_boxes=bubble_boxes,
|
| 263 |
step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
|
| 264 |
)
|
| 265 |
-
debug_files.append(
|
| 266 |
|
| 267 |
-
# ---- D) Render translated text
|
| 268 |
-
translated_chunk_img =
|
| 269 |
|
| 270 |
for t in translations:
|
| 271 |
-
|
| 272 |
-
if not
|
| 273 |
continue
|
| 274 |
|
| 275 |
bidx = t.get("matched_bubble_idx")
|
| 276 |
|
| 277 |
-
|
|
|
|
| 278 |
render_poly = bubble_polygons[bidx]
|
| 279 |
else:
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
translated_chunk_img = draw_translated_text_convex(
|
| 283 |
translated_chunk_img,
|
| 284 |
polygon_coords=render_poly,
|
| 285 |
-
text=
|
| 286 |
font_path=FONT_PATH,
|
| 287 |
font_scale=1.0,
|
| 288 |
original_polygon=t.get("original_polygon"),
|
| 289 |
-
bubble_polygon=bubble_polygons[bidx] if bidx is not None else None
|
| 290 |
)
|
| 291 |
|
| 292 |
-
#
|
| 293 |
-
all_translations.extend(translations)
|
| 294 |
-
|
| 295 |
-
# Save to table
|
| 296 |
-
for t in translations:
|
| 297 |
-
all_tables.append([t["original"], t["translated"]])
|
| 298 |
-
|
| 299 |
translated_chunks.append(translated_chunk_img)
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
|
|
|
| 303 |
|
| 304 |
# -------------------------------------------------------
|
| 305 |
# 3. Assemble HTML output (original & translated)
|
|
@@ -307,6 +302,7 @@ def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", de
|
|
| 307 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 308 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 309 |
|
|
|
|
| 310 |
return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
|
| 311 |
|
| 312 |
def split_image_into_chunks(img, num_chunks):
|
|
|
|
| 177 |
|
| 178 |
return out_path
|
| 179 |
|
| 180 |
+
|
| 181 |
+
# ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
|
| 182 |
# ===================== Main Bubble Translation Pipeline (Chunk-Based) ===================
|
| 183 |
def bubble_pipeline_single(file_obj, num_chunks=1, polygon_strategy="hybrid", debug=True):
|
| 184 |
"""
|
| 185 |
Manga bubble-aware translation pipeline (CHUNK-BASED):
|
| 186 |
+
- Split image into vertical chunks
|
| 187 |
- For each chunk:
|
| 188 |
+
* RT-DETR bubble detection
|
| 189 |
+
* Bubble shape refinement (outer + inner polygons)
|
| 190 |
+
* Mask-based OCR inside bubble interiors
|
| 191 |
+
* Bubble-aware polygon matching
|
| 192 |
+
* Render translated text inside bubble regions
|
| 193 |
+
- Return concatenated HTML previews + table + debug files
|
| 194 |
+
|
| 195 |
+
NOTE:
|
| 196 |
+
* All polygons are kept in CHUNK-LOCAL coordinates.
|
| 197 |
+
* Manual-edit pipeline isn't wired for Bubble mode, so this is fine.
|
| 198 |
"""
|
| 199 |
|
| 200 |
debug_files = []
|
| 201 |
|
| 202 |
# -------------------------------------------------------
|
| 203 |
+
# 1. Load & split image into chunks (vertical)
|
| 204 |
# -------------------------------------------------------
|
| 205 |
filename, full_img, chunks = load_and_split_image(file_obj, num_chunks)
|
| 206 |
print(f"π bubble_pipeline_single: {filename}, full size={full_img.size}, chunks={len(chunks)}")
|
| 207 |
|
| 208 |
+
all_translations = [] # flattened list across chunks
|
| 209 |
+
all_tables = [] # [["original", "translated"], ...]
|
| 210 |
+
translated_chunks = [] # list of PIL images (per chunk)
|
|
|
|
|
|
|
| 211 |
|
| 212 |
# -------------------------------------------------------
|
| 213 |
# 2. Process each chunk independently
|
| 214 |
# -------------------------------------------------------
|
| 215 |
for ci, chunk in enumerate(chunks):
|
| 216 |
print(f"\n================ CHUNK {ci} ================")
|
| 217 |
+
cw, ch = chunk.size
|
| 218 |
+
print(f" Chunk size: {cw}x{ch}")
|
| 219 |
|
| 220 |
+
# ---- A) Detect & refine bubbles in THIS chunk (local coords) ----
|
| 221 |
bubble_polygons, interior_polygons, bubble_boxes = detect_and_refine_bubbles(chunk)
|
| 222 |
print(f"π Chunk {ci}: found {len(bubble_polygons)} bubble polygons")
|
| 223 |
|
| 224 |
+
# Debug: bubble shapes & boxes on the chunk image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
if debug:
|
| 226 |
+
dbg_path = visualize_all_debug(
|
| 227 |
+
chunk, [], bubble_polygons, bubble_boxes=bubble_boxes,
|
|
|
|
| 228 |
step_name=f"chunk{ci}_bubbles", prefix="bubble_dbg"
|
| 229 |
)
|
| 230 |
+
debug_files.append(dbg_path)
|
| 231 |
|
| 232 |
+
# ---- B) Mask-based OCR INSIDE bubble interiors (on the chunk) ----
|
| 233 |
+
print(f"π Chunk {ci}: masked OCR inside bubble interiors...")
|
| 234 |
+
translations = extract_and_translate_with_masks(chunk, interior_polygons)
|
| 235 |
|
| 236 |
+
# Fallback: if masked OCR failed, run full OCR on the chunk
|
| 237 |
if not translations:
|
| 238 |
+
print(f"β οΈ Chunk {ci}: masked OCR found no text β fallback to full OCR")
|
| 239 |
translations = extract_and_translate_chunk(chunk)
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
print(f"β¬οΈ Chunk {ci}: OCR detections = {len(translations)}")
|
| 242 |
|
| 243 |
+
# Save original polygons for debug visualization
|
| 244 |
for t in translations:
|
| 245 |
t["original_polygon"] = t.get("polygon")
|
| 246 |
|
| 247 |
+
# ---- C) Bubble-aware matching (still in chunk-local coords) ----
|
| 248 |
+
if bubble_polygons:
|
| 249 |
+
print(f"π Chunk {ci}: matching OCR polygons to bubbles...")
|
| 250 |
+
translations = match_translations_to_bubbles(translations, bubble_polygons)
|
| 251 |
+
else:
|
| 252 |
+
print(f"β οΈ Chunk {ci}: no bubble polygons β skip bubble matching")
|
| 253 |
|
| 254 |
+
# Debug: polygons after matching
|
| 255 |
if debug:
|
| 256 |
+
dbg_path2 = visualize_all_debug(
|
| 257 |
+
chunk, translations, bubble_polygons, bubble_boxes=bubble_boxes,
|
|
|
|
| 258 |
step_name=f"chunk{ci}_after_correction", prefix="bubble_dbg"
|
| 259 |
)
|
| 260 |
+
debug_files.append(dbg_path2)
|
| 261 |
|
| 262 |
+
# ---- D) Render translated text onto THIS chunk ----
|
| 263 |
+
translated_chunk_img = chunk.copy()
|
| 264 |
|
| 265 |
for t in translations:
|
| 266 |
+
translated_text = t.get("translated", "")
|
| 267 |
+
if not translated_text:
|
| 268 |
continue
|
| 269 |
|
| 270 |
bidx = t.get("matched_bubble_idx")
|
| 271 |
|
| 272 |
+
# Prefer the refined bubble polygon when we have a match
|
| 273 |
+
if bidx is not None and 0 <= bidx < len(bubble_polygons):
|
| 274 |
render_poly = bubble_polygons[bidx]
|
| 275 |
else:
|
| 276 |
+
# Fallback: use OCR polygon directly
|
| 277 |
+
render_poly = t.get("polygon")
|
| 278 |
+
|
| 279 |
+
if not render_poly:
|
| 280 |
+
continue
|
| 281 |
|
| 282 |
translated_chunk_img = draw_translated_text_convex(
|
| 283 |
translated_chunk_img,
|
| 284 |
polygon_coords=render_poly,
|
| 285 |
+
text=translated_text,
|
| 286 |
font_path=FONT_PATH,
|
| 287 |
font_scale=1.0,
|
| 288 |
original_polygon=t.get("original_polygon"),
|
| 289 |
+
bubble_polygon=bubble_polygons[bidx] if (bidx is not None and 0 <= bidx < len(bubble_polygons)) else None,
|
| 290 |
)
|
| 291 |
|
| 292 |
+
# ---- E) Collect outputs from this chunk ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
translated_chunks.append(translated_chunk_img)
|
| 294 |
|
| 295 |
+
for t in translations:
|
| 296 |
+
all_translations.append(t)
|
| 297 |
+
all_tables.append([t.get("original", ""), t.get("translated", "")])
|
| 298 |
|
| 299 |
# -------------------------------------------------------
|
| 300 |
# 3. Assemble HTML output (original & translated)
|
|
|
|
| 302 |
orig_html = "".join([encode_image_to_html(c) for c in chunks])
|
| 303 |
trans_html = "".join([encode_image_to_html(t) for t in translated_chunks])
|
| 304 |
|
| 305 |
+
# Keep API compatible: translations wrapped in a list
|
| 306 |
return filename, orig_html, trans_html, all_tables, [all_translations], debug_files
|
| 307 |
|
| 308 |
def split_image_into_chunks(img, num_chunks):
|