Spaces:
Running
Running
Update utils/bubble_utils.py
Browse files- utils/bubble_utils.py +1 -54
utils/bubble_utils.py
CHANGED
|
@@ -5,7 +5,7 @@ import numpy as np
|
|
| 5 |
from PIL import Image, ImageDraw
|
| 6 |
|
| 7 |
from utils.image_utils import load_and_split_image, encode_image_to_html
|
| 8 |
-
from utils.ocr_utils import extract_and_translate_chunk
|
| 9 |
from utils.polygon_utils import (
|
| 10 |
draw_translated_text_convex,
|
| 11 |
shrink_or_expand_polygon,
|
|
@@ -17,59 +17,6 @@ from utils.bubble_detect import detect_speech_bubbles_robust
|
|
| 17 |
from utils.u2net_detector import detect_bubbles_u2net
|
| 18 |
from utils.bubble_detect_rtdetr import detect_and_refine_bubbles, polygon_to_mask
|
| 19 |
|
| 20 |
-
def extract_and_translate_with_masks(full_img, interior_polygons):
|
| 21 |
-
"""
|
| 22 |
-
OCR ONLY inside bubble interior polygons.
|
| 23 |
-
Returns refined translations with improved accuracy.
|
| 24 |
-
"""
|
| 25 |
-
np_img = np.array(full_img)
|
| 26 |
-
H, W = np_img.shape[:2]
|
| 27 |
-
|
| 28 |
-
translations = []
|
| 29 |
-
|
| 30 |
-
for idx, poly in enumerate(interior_polygons):
|
| 31 |
-
if not poly:
|
| 32 |
-
continue
|
| 33 |
-
|
| 34 |
-
# --- Create mask for this bubble ---
|
| 35 |
-
mask = polygon_to_mask((W, H), poly)
|
| 36 |
-
|
| 37 |
-
# Apply mask to original image (white outside)
|
| 38 |
-
bubble_img = np.where(mask[..., None] == 255, np_img, 255).astype(np.uint8)
|
| 39 |
-
|
| 40 |
-
# OCR on masked image
|
| 41 |
-
results = ocr_model.ocr(bubble_img)
|
| 42 |
-
|
| 43 |
-
if not results or not isinstance(results[0], dict):
|
| 44 |
-
continue
|
| 45 |
-
|
| 46 |
-
res = results[0]
|
| 47 |
-
polys = res.get("rec_polys", [])
|
| 48 |
-
texts = res.get("rec_texts", [])
|
| 49 |
-
|
| 50 |
-
for poly_coords, text in zip(polys, texts):
|
| 51 |
-
merged_text = text.strip()
|
| 52 |
-
if not merged_text:
|
| 53 |
-
continue
|
| 54 |
-
|
| 55 |
-
# Convert polygon coords to global coords
|
| 56 |
-
poly_global = [(int(x), int(y)) for x, y in poly_coords]
|
| 57 |
-
|
| 58 |
-
# Translate text
|
| 59 |
-
try:
|
| 60 |
-
translated = translate_text_azure(merged_text)
|
| 61 |
-
except:
|
| 62 |
-
translated = merged_text
|
| 63 |
-
|
| 64 |
-
translations.append({
|
| 65 |
-
"original": merged_text,
|
| 66 |
-
"translated": translated,
|
| 67 |
-
"polygon": poly_global,
|
| 68 |
-
"matched_bubble_idx": idx
|
| 69 |
-
})
|
| 70 |
-
|
| 71 |
-
return translations
|
| 72 |
-
|
| 73 |
def match_translations_to_bubbles(translations, bubble_polygons, min_overlap=0.10):
|
| 74 |
"""
|
| 75 |
Add matched_bubble_idx to each OCR translation.
|
|
|
|
| 5 |
from PIL import Image, ImageDraw
|
| 6 |
|
| 7 |
from utils.image_utils import load_and_split_image, encode_image_to_html
|
| 8 |
+
from utils.ocr_utils import extract_and_translate_chunk, extract_and_translate_with_masks
|
| 9 |
from utils.polygon_utils import (
|
| 10 |
draw_translated_text_convex,
|
| 11 |
shrink_or_expand_polygon,
|
|
|
|
| 17 |
from utils.u2net_detector import detect_bubbles_u2net
|
| 18 |
from utils.bubble_detect_rtdetr import detect_and_refine_bubbles, polygon_to_mask
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def match_translations_to_bubbles(translations, bubble_polygons, min_overlap=0.10):
|
| 21 |
"""
|
| 22 |
Add matched_bubble_idx to each OCR translation.
|