Spaces:
Sleeping
Sleeping
| """ | |
| Enhanced polygon utilities with bubble-based correction | |
| """ | |
| import os | |
| import cv2 | |
| import numpy as np | |
| import textwrap | |
| from shapely.geometry import Polygon, MultiPoint, Point | |
| from PIL import Image, ImageDraw, ImageFont | |
| FONT_PATH = os.path.abspath( | |
| os.path.join(os.path.dirname(__file__), "..", "NotoSansSC-Regular.ttf") | |
| ) | |
| # ============================ Geometry Helpers ============================ | |
| def calculate_iou(poly1, poly2): | |
| """Calculate Intersection over Union between two polygons""" | |
| try: | |
| p1 = Polygon(poly1) | |
| p2 = Polygon(poly2) | |
| if not p1.is_valid: | |
| p1 = p1.buffer(0) | |
| if not p2.is_valid: | |
| p2 = p2.buffer(0) | |
| intersection = p1.intersection(p2).area | |
| union = p1.union(p2).area | |
| return intersection / union if union > 0 else 0.0 | |
| except Exception as e: | |
| print(f"⚠️ calculate_iou failed: {e}") | |
| return 0.0 | |
| def sanitize_polygon(poly): | |
| """ | |
| Ensures polygon has at least 4 distinct points. | |
| Returns None if invalid. | |
| """ | |
| if not poly: | |
| return None | |
| # Flatten & cast to int | |
| pts = [(int(x), int(y)) for x, y in poly if isinstance(x, (int,float)) and isinstance(y, (int,float))] | |
| # Remove duplicates | |
| pts = list(dict.fromkeys(pts)) | |
| # Must have ≥ 4 points | |
| if len(pts) < 4: | |
| return None | |
| return pts | |
| def calculate_polygon_overlap(ocr_poly, bubble_poly): | |
| try: | |
| if not ocr_poly or not bubble_poly: | |
| return 0.0 | |
| if len(ocr_poly) < 4 or len(bubble_poly) < 4: | |
| return 0.0 | |
| ocr_shape = Polygon(ocr_poly) | |
| bubble_shape = Polygon(bubble_poly) | |
| if not ocr_shape.is_valid: | |
| ocr_shape = ocr_shape.buffer(0) | |
| if not bubble_shape.is_valid: | |
| bubble_shape = bubble_shape.buffer(0) | |
| inter = ocr_shape.intersection(bubble_shape).area | |
| ocr_area = ocr_shape.area | |
| return inter / ocr_area if ocr_area > 0 else 0.0 | |
| except Exception as e: | |
| print(f"⚠️ calculate_polygon_overlap failed: {e}") | |
| return 0.0 | |
| def match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons, min_overlap=0.15): | |
| """ | |
| Return index of bubble with the highest overlap ratio with OCR polygon. | |
| overlap = area(ocr ∩ bubble) / area(ocr) | |
| If best overlap < min_overlap → no match. | |
| """ | |
| if not bubble_polygons: | |
| return None | |
| best_idx = None | |
| best_overlap = 0.0 | |
| for idx, bp in enumerate(bubble_polygons): | |
| overlap = calculate_polygon_overlap(ocr_poly, bp) | |
| if overlap > best_overlap: | |
| best_overlap = overlap | |
| best_idx = idx | |
| if best_idx is not None and best_overlap >= min_overlap: | |
| return best_idx | |
| return None | |
| # ====================== Polygon Correction with Bubbles =================== | |
| def correct_polygon_with_bubble(ocr_polygon, bubble_polygon, strategy="hybrid"): | |
| """ | |
| Correct OCR polygon using bubble polygon. | |
| Strategies: | |
| - "bubble": Use bubble polygon directly | |
| - "intersect": Use intersection of OCR and bubble | |
| - "expand": Slightly expand OCR region inside bubble | |
| - "hybrid": Choose based on relative sizes & intersection | |
| """ | |
| try: | |
| ocr_shape = Polygon(ocr_polygon) | |
| bubble_shape = Polygon(bubble_polygon) | |
| if not ocr_shape.is_valid: | |
| ocr_shape = ocr_shape.buffer(0) | |
| if not bubble_shape.is_valid: | |
| bubble_shape = bubble_shape.buffer(0) | |
| # ---- Strategy: use bubble fully ---- | |
| if strategy == "bubble": | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| # ---- Strategy: intersection region ---- | |
| if strategy == "intersect": | |
| inter = ocr_shape.intersection(bubble_shape) | |
| if inter.is_empty or inter.area < ocr_shape.area * 0.3: | |
| # Intersection too small, bubble is safer | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| if inter.geom_type == "Polygon": | |
| return [(int(x), int(y)) for x, y in inter.exterior.coords[:-1]] | |
| polys = list(inter.geoms) if hasattr(inter, "geoms") else [inter] | |
| largest = max(polys, key=lambda p: p.area if hasattr(p, "area") else 0) | |
| return [(int(x), int(y)) for x, y in largest.exterior.coords[:-1]] | |
| # ---- Strategy: expand OCR slightly toward bubble ---- | |
| if strategy == "expand": | |
| expanded = ocr_shape.buffer(10) # ~10px expansion | |
| clipped = expanded.intersection(bubble_shape) | |
| if not clipped.is_empty and clipped.area > ocr_shape.area * 0.5: | |
| if clipped.geom_type == "Polygon": | |
| return [(int(x), int(y)) for x, y in clipped.exterior.coords[:-1]] | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| # ---- Strategy: hybrid ---- | |
| if strategy == "hybrid": | |
| size_ratio = ( | |
| bubble_shape.area / ocr_shape.area if ocr_shape.area > 0 else 999 | |
| ) | |
| if size_ratio > 3: | |
| # Bubble is much larger than OCR region: likely multi-line speech | |
| shrunk = bubble_shape.buffer(-5) | |
| if shrunk.is_empty: | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| return [(int(x), int(y)) for x, y in shrunk.exterior.coords[:-1]] | |
| elif size_ratio < 1.5: | |
| # Similar sizes: use intersection | |
| return correct_polygon_with_bubble(ocr_polygon, bubble_polygon, "intersect") | |
| else: | |
| # Moderate difference → bubble is still safer | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| # Fallback | |
| return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] | |
| except Exception as e: | |
| print(f"⚠️ Polygon correction failed: {e}, using original OCR polygon") | |
| return ocr_polygon | |
| def correct_ocr_polygons_with_bubbles(translations, bubble_polygons, strategy="hybrid"): | |
| """ | |
| Correct all OCR polygons using detected bubbles. | |
| Adds: | |
| - "original_polygon" | |
| - "matched_bubble_idx" | |
| Returns: | |
| updated translations list | |
| """ | |
| corrected = [] | |
| unmatched = 0 | |
| for t in translations: | |
| ocr_poly = t.get("polygon") or t.get("polygons") | |
| if not ocr_poly: | |
| corrected.append(t) | |
| continue | |
| best_idx = match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons) | |
| t_copy = t.copy() | |
| t_copy["original_polygon"] = ocr_poly | |
| if best_idx is not None: | |
| bubble_poly = bubble_polygons[best_idx] | |
| corrected_poly = correct_polygon_with_bubble(ocr_poly, bubble_poly, strategy) | |
| t_copy["polygon"] = corrected_poly | |
| t_copy["matched_bubble_idx"] = best_idx | |
| else: | |
| # No match → keep original OCR polygon | |
| t_copy["matched_bubble_idx"] = None | |
| t_copy["polygon"] = ocr_poly | |
| unmatched += 1 | |
| corrected.append(t_copy) | |
| if unmatched: | |
| print(f"ℹ️ {unmatched}/{len(translations)} OCR regions had no matching bubble") | |
| return corrected | |
| # ========================= Basic Polygon Utilities ======================= | |
| def shrink_or_expand_polygon(polygon, shrink_ratio=0.9): | |
| """ | |
| Resize a polygon around its centroid. | |
| shrink_ratio < 1 → shrink | |
| shrink_ratio > 1 → expand | |
| """ | |
| if not polygon: | |
| return polygon | |
| ratio = shrink_ratio | |
| cx = sum(x for x, _ in polygon) / len(polygon) | |
| cy = sum(y for _, y in polygon) / len(polygon) | |
| new_poly = [ | |
| ((x - cx) * ratio + cx, (y - cy) * ratio + cy) | |
| for x, y in polygon | |
| ] | |
| return [(int(x), int(y)) for x, y in new_poly] | |
| def inpaint_polygon(img: Image.Image, polygon, mode="auto", fallback_color=(255, 255, 255)): | |
| np_img = np.array(img.convert("RGB")) | |
| mask = np.zeros((np_img.shape[0], np_img.shape[1]), dtype=np.uint8) | |
| pts = np.array(polygon, np.int32).reshape((-1, 1, 2)) | |
| cv2.fillPoly(mask, [pts], 255) | |
| # Could use cv2.inpaint for fancy filling; for manga bubbles simple fill is OK | |
| img_copy = img.copy() | |
| draw = ImageDraw.Draw(img_copy) | |
| draw.polygon(polygon, fill=fallback_color) | |
| return img_copy | |
| def merge_polygons_to_convex_hull(polygons): | |
| points = [pt for poly in polygons for pt in poly] | |
| if not points: | |
| return [] | |
| hull = MultiPoint(points).convex_hull | |
| return [(int(x), int(y)) for x, y in hull.exterior.coords[:-1]] | |
| # ======================== Rendering / Text Drawing ======================= | |
| def render_translated_chunk(img: Image.Image, translations, font_path=None, font_scale=1.0): | |
| """ | |
| Render list of translations (with 'polygon' and 'translated') onto image. | |
| """ | |
| img_copy = img.copy() | |
| for entry in translations: | |
| polygon = entry.get("polygon") or entry.get("polygons") | |
| text = entry.get("translated", "") | |
| if polygon and text: | |
| img_copy = draw_translated_text_convex( | |
| img_copy, | |
| polygon, | |
| text, | |
| font_path=font_path or FONT_PATH, | |
| font_scale=font_scale | |
| ) | |
| return img_copy | |
| def draw_translated_text_convex( | |
| img, | |
| polygon_coords, | |
| text, | |
| font_path=None, | |
| font_scale=1.0, | |
| original_polygon=None, # New: OCR polygon | |
| bubble_polygon=None # New: detected bubble polygon | |
| ): | |
| """ | |
| Inpaint + draw translated text, and draw 3 debug polygons: | |
| - RED: original OCR polygon | |
| - BLUE: bubble polygon (matched bubble) | |
| - GREEN: final render polygon (slightly shrunk) | |
| """ | |
| if font_path is None: | |
| font_path = FONT_PATH | |
| draw = ImageDraw.Draw(img, "RGBA") | |
| # --------------------------------------------------------------------- | |
| # 1. Draw ORIGINAL OCR polygon (RED) | |
| # --------------------------------------------------------------------- | |
| if original_polygon: | |
| draw.line( | |
| original_polygon + [original_polygon[0]], | |
| fill=(255, 50, 50, 200), | |
| width=3 | |
| ) | |
| # --------------------------------------------------------------------- | |
| # 2. Draw BUBBLE polygon (BLUE) | |
| # --------------------------------------------------------------------- | |
| if bubble_polygon: | |
| draw.line( | |
| bubble_polygon + [bubble_polygon[0]], | |
| fill=(50, 150, 255, 200), | |
| width=3 | |
| ) | |
| # --------------------------------------------------------------------- | |
| # 3. Compute render polygon and draw it (GREEN) | |
| # --------------------------------------------------------------------- | |
| render_polygon = shrink_or_expand_polygon(polygon_coords, shrink_ratio=0.9) | |
| draw.line( | |
| render_polygon + [render_polygon[0]], | |
| fill=(50, 255, 100, 220), # GREEN | |
| width=3 | |
| ) | |
| # --------------------------------------------------------------------- | |
| # 4. Inpaint inside final render polygon | |
| # --------------------------------------------------------------------- | |
| img = inpaint_polygon(img, render_polygon, mode="auto", fallback_color=(255, 255, 255)) | |
| # --------------------------------------------------------------------- | |
| # 5. Draw wrapped translated text | |
| # --------------------------------------------------------------------- | |
| draw_wrapped_text( | |
| img, | |
| render_polygon, | |
| text, | |
| font_path, | |
| polygon_for_size=polygon_coords, | |
| font_scale=font_scale, | |
| ) | |
| return img | |
| def draw_wrapped_text(img, polygon, text, font_path, polygon_for_size=None, font_scale=1.0): | |
| """ | |
| Draw wrapped text centered in the polygon bounding box. | |
| """ | |
| polygon_for_size = polygon_for_size or polygon | |
| draw = ImageDraw.Draw(img) | |
| xs, ys = zip(*polygon_for_size) | |
| x_min, x_max = min(xs), max(xs) | |
| y_min, y_max = min(ys), max(ys) | |
| box_width = x_max - x_min | |
| box_height = y_max - y_min | |
| if box_width <= 0 or box_height <= 0: | |
| return | |
| avg_char_width = 0.4 | |
| estimated_size = int(min(box_height / 1.2, box_width / (len(text) * avg_char_width + 1))) | |
| estimated_size = max(6, estimated_size) | |
| font_size = int(estimated_size * font_scale) | |
| font = ImageFont.truetype(font_path, font_size) | |
| max_chars = max(1, int(box_width / (font.getbbox("A")[2] + 1))) | |
| wrapped = textwrap.fill(text, width=max_chars) | |
| bbox = draw.textbbox((0, 0), wrapped, font=font) | |
| text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1] | |
| x = x_min + (box_width - text_w) / 2 | |
| y = y_min + (box_height - text_h) / 2 | |
| draw.text((x, y), wrapped, font=font, fill="black", align="center") | |