manga_translation / utils /polygon_utils.py
qqwjq1981's picture
Update utils/polygon_utils.py
e63ba27 verified
"""
Enhanced polygon utilities with bubble-based correction
"""
import os
import cv2
import numpy as np
import textwrap
from shapely.geometry import Polygon, MultiPoint, Point
from PIL import Image, ImageDraw, ImageFont
FONT_PATH = os.path.abspath(
os.path.join(os.path.dirname(__file__), "..", "NotoSansSC-Regular.ttf")
)
# ============================ Geometry Helpers ============================
def calculate_iou(poly1, poly2):
"""Calculate Intersection over Union between two polygons"""
try:
p1 = Polygon(poly1)
p2 = Polygon(poly2)
if not p1.is_valid:
p1 = p1.buffer(0)
if not p2.is_valid:
p2 = p2.buffer(0)
intersection = p1.intersection(p2).area
union = p1.union(p2).area
return intersection / union if union > 0 else 0.0
except Exception as e:
print(f"⚠️ calculate_iou failed: {e}")
return 0.0
def sanitize_polygon(poly):
"""
Ensures polygon has at least 4 distinct points.
Returns None if invalid.
"""
if not poly:
return None
# Flatten & cast to int
pts = [(int(x), int(y)) for x, y in poly if isinstance(x, (int,float)) and isinstance(y, (int,float))]
# Remove duplicates
pts = list(dict.fromkeys(pts))
# Must have ≥ 4 points
if len(pts) < 4:
return None
return pts
def calculate_polygon_overlap(ocr_poly, bubble_poly):
try:
if not ocr_poly or not bubble_poly:
return 0.0
if len(ocr_poly) < 4 or len(bubble_poly) < 4:
return 0.0
ocr_shape = Polygon(ocr_poly)
bubble_shape = Polygon(bubble_poly)
if not ocr_shape.is_valid:
ocr_shape = ocr_shape.buffer(0)
if not bubble_shape.is_valid:
bubble_shape = bubble_shape.buffer(0)
inter = ocr_shape.intersection(bubble_shape).area
ocr_area = ocr_shape.area
return inter / ocr_area if ocr_area > 0 else 0.0
except Exception as e:
print(f"⚠️ calculate_polygon_overlap failed: {e}")
return 0.0
def match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons, min_overlap=0.15):
"""
Return index of bubble with the highest overlap ratio with OCR polygon.
overlap = area(ocr ∩ bubble) / area(ocr)
If best overlap < min_overlap → no match.
"""
if not bubble_polygons:
return None
best_idx = None
best_overlap = 0.0
for idx, bp in enumerate(bubble_polygons):
overlap = calculate_polygon_overlap(ocr_poly, bp)
if overlap > best_overlap:
best_overlap = overlap
best_idx = idx
if best_idx is not None and best_overlap >= min_overlap:
return best_idx
return None
# ====================== Polygon Correction with Bubbles ===================
def correct_polygon_with_bubble(ocr_polygon, bubble_polygon, strategy="hybrid"):
"""
Correct OCR polygon using bubble polygon.
Strategies:
- "bubble": Use bubble polygon directly
- "intersect": Use intersection of OCR and bubble
- "expand": Slightly expand OCR region inside bubble
- "hybrid": Choose based on relative sizes & intersection
"""
try:
ocr_shape = Polygon(ocr_polygon)
bubble_shape = Polygon(bubble_polygon)
if not ocr_shape.is_valid:
ocr_shape = ocr_shape.buffer(0)
if not bubble_shape.is_valid:
bubble_shape = bubble_shape.buffer(0)
# ---- Strategy: use bubble fully ----
if strategy == "bubble":
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
# ---- Strategy: intersection region ----
if strategy == "intersect":
inter = ocr_shape.intersection(bubble_shape)
if inter.is_empty or inter.area < ocr_shape.area * 0.3:
# Intersection too small, bubble is safer
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
if inter.geom_type == "Polygon":
return [(int(x), int(y)) for x, y in inter.exterior.coords[:-1]]
polys = list(inter.geoms) if hasattr(inter, "geoms") else [inter]
largest = max(polys, key=lambda p: p.area if hasattr(p, "area") else 0)
return [(int(x), int(y)) for x, y in largest.exterior.coords[:-1]]
# ---- Strategy: expand OCR slightly toward bubble ----
if strategy == "expand":
expanded = ocr_shape.buffer(10) # ~10px expansion
clipped = expanded.intersection(bubble_shape)
if not clipped.is_empty and clipped.area > ocr_shape.area * 0.5:
if clipped.geom_type == "Polygon":
return [(int(x), int(y)) for x, y in clipped.exterior.coords[:-1]]
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
# ---- Strategy: hybrid ----
if strategy == "hybrid":
size_ratio = (
bubble_shape.area / ocr_shape.area if ocr_shape.area > 0 else 999
)
if size_ratio > 3:
# Bubble is much larger than OCR region: likely multi-line speech
shrunk = bubble_shape.buffer(-5)
if shrunk.is_empty:
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
return [(int(x), int(y)) for x, y in shrunk.exterior.coords[:-1]]
elif size_ratio < 1.5:
# Similar sizes: use intersection
return correct_polygon_with_bubble(ocr_polygon, bubble_polygon, "intersect")
else:
# Moderate difference → bubble is still safer
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
# Fallback
return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]]
except Exception as e:
print(f"⚠️ Polygon correction failed: {e}, using original OCR polygon")
return ocr_polygon
def correct_ocr_polygons_with_bubbles(translations, bubble_polygons, strategy="hybrid"):
"""
Correct all OCR polygons using detected bubbles.
Adds:
- "original_polygon"
- "matched_bubble_idx"
Returns:
updated translations list
"""
corrected = []
unmatched = 0
for t in translations:
ocr_poly = t.get("polygon") or t.get("polygons")
if not ocr_poly:
corrected.append(t)
continue
best_idx = match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons)
t_copy = t.copy()
t_copy["original_polygon"] = ocr_poly
if best_idx is not None:
bubble_poly = bubble_polygons[best_idx]
corrected_poly = correct_polygon_with_bubble(ocr_poly, bubble_poly, strategy)
t_copy["polygon"] = corrected_poly
t_copy["matched_bubble_idx"] = best_idx
else:
# No match → keep original OCR polygon
t_copy["matched_bubble_idx"] = None
t_copy["polygon"] = ocr_poly
unmatched += 1
corrected.append(t_copy)
if unmatched:
print(f"ℹ️ {unmatched}/{len(translations)} OCR regions had no matching bubble")
return corrected
# ========================= Basic Polygon Utilities =======================
def shrink_or_expand_polygon(polygon, shrink_ratio=0.9):
"""
Resize a polygon around its centroid.
shrink_ratio < 1 → shrink
shrink_ratio > 1 → expand
"""
if not polygon:
return polygon
ratio = shrink_ratio
cx = sum(x for x, _ in polygon) / len(polygon)
cy = sum(y for _, y in polygon) / len(polygon)
new_poly = [
((x - cx) * ratio + cx, (y - cy) * ratio + cy)
for x, y in polygon
]
return [(int(x), int(y)) for x, y in new_poly]
def inpaint_polygon(img: Image.Image, polygon, mode="auto", fallback_color=(255, 255, 255)):
np_img = np.array(img.convert("RGB"))
mask = np.zeros((np_img.shape[0], np_img.shape[1]), dtype=np.uint8)
pts = np.array(polygon, np.int32).reshape((-1, 1, 2))
cv2.fillPoly(mask, [pts], 255)
# Could use cv2.inpaint for fancy filling; for manga bubbles simple fill is OK
img_copy = img.copy()
draw = ImageDraw.Draw(img_copy)
draw.polygon(polygon, fill=fallback_color)
return img_copy
def merge_polygons_to_convex_hull(polygons):
points = [pt for poly in polygons for pt in poly]
if not points:
return []
hull = MultiPoint(points).convex_hull
return [(int(x), int(y)) for x, y in hull.exterior.coords[:-1]]
# ======================== Rendering / Text Drawing =======================
def render_translated_chunk(img: Image.Image, translations, font_path=None, font_scale=1.0):
"""
Render list of translations (with 'polygon' and 'translated') onto image.
"""
img_copy = img.copy()
for entry in translations:
polygon = entry.get("polygon") or entry.get("polygons")
text = entry.get("translated", "")
if polygon and text:
img_copy = draw_translated_text_convex(
img_copy,
polygon,
text,
font_path=font_path or FONT_PATH,
font_scale=font_scale
)
return img_copy
def draw_translated_text_convex(
img,
polygon_coords,
text,
font_path=None,
font_scale=1.0,
original_polygon=None, # New: OCR polygon
bubble_polygon=None # New: detected bubble polygon
):
"""
Inpaint + draw translated text, and draw 3 debug polygons:
- RED: original OCR polygon
- BLUE: bubble polygon (matched bubble)
- GREEN: final render polygon (slightly shrunk)
"""
if font_path is None:
font_path = FONT_PATH
draw = ImageDraw.Draw(img, "RGBA")
# ---------------------------------------------------------------------
# 1. Draw ORIGINAL OCR polygon (RED)
# ---------------------------------------------------------------------
if original_polygon:
draw.line(
original_polygon + [original_polygon[0]],
fill=(255, 50, 50, 200),
width=3
)
# ---------------------------------------------------------------------
# 2. Draw BUBBLE polygon (BLUE)
# ---------------------------------------------------------------------
if bubble_polygon:
draw.line(
bubble_polygon + [bubble_polygon[0]],
fill=(50, 150, 255, 200),
width=3
)
# ---------------------------------------------------------------------
# 3. Compute render polygon and draw it (GREEN)
# ---------------------------------------------------------------------
render_polygon = shrink_or_expand_polygon(polygon_coords, shrink_ratio=0.9)
draw.line(
render_polygon + [render_polygon[0]],
fill=(50, 255, 100, 220), # GREEN
width=3
)
# ---------------------------------------------------------------------
# 4. Inpaint inside final render polygon
# ---------------------------------------------------------------------
img = inpaint_polygon(img, render_polygon, mode="auto", fallback_color=(255, 255, 255))
# ---------------------------------------------------------------------
# 5. Draw wrapped translated text
# ---------------------------------------------------------------------
draw_wrapped_text(
img,
render_polygon,
text,
font_path,
polygon_for_size=polygon_coords,
font_scale=font_scale,
)
return img
def draw_wrapped_text(img, polygon, text, font_path, polygon_for_size=None, font_scale=1.0):
"""
Draw wrapped text centered in the polygon bounding box.
"""
polygon_for_size = polygon_for_size or polygon
draw = ImageDraw.Draw(img)
xs, ys = zip(*polygon_for_size)
x_min, x_max = min(xs), max(xs)
y_min, y_max = min(ys), max(ys)
box_width = x_max - x_min
box_height = y_max - y_min
if box_width <= 0 or box_height <= 0:
return
avg_char_width = 0.4
estimated_size = int(min(box_height / 1.2, box_width / (len(text) * avg_char_width + 1)))
estimated_size = max(6, estimated_size)
font_size = int(estimated_size * font_scale)
font = ImageFont.truetype(font_path, font_size)
max_chars = max(1, int(box_width / (font.getbbox("A")[2] + 1)))
wrapped = textwrap.fill(text, width=max_chars)
bbox = draw.textbbox((0, 0), wrapped, font=font)
text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1]
x = x_min + (box_width - text_w) / 2
y = y_min + (box_height - text_h) / 2
draw.text((x, y), wrapped, font=font, fill="black", align="center")