""" Enhanced polygon utilities with bubble-based correction """ import os import cv2 import numpy as np import textwrap from shapely.geometry import Polygon, MultiPoint, Point from PIL import Image, ImageDraw, ImageFont FONT_PATH = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "NotoSansSC-Regular.ttf") ) # ============================ Geometry Helpers ============================ def calculate_iou(poly1, poly2): """Calculate Intersection over Union between two polygons""" try: p1 = Polygon(poly1) p2 = Polygon(poly2) if not p1.is_valid: p1 = p1.buffer(0) if not p2.is_valid: p2 = p2.buffer(0) intersection = p1.intersection(p2).area union = p1.union(p2).area return intersection / union if union > 0 else 0.0 except Exception as e: print(f"⚠️ calculate_iou failed: {e}") return 0.0 def sanitize_polygon(poly): """ Ensures polygon has at least 4 distinct points. Returns None if invalid. """ if not poly: return None # Flatten & cast to int pts = [(int(x), int(y)) for x, y in poly if isinstance(x, (int,float)) and isinstance(y, (int,float))] # Remove duplicates pts = list(dict.fromkeys(pts)) # Must have ≥ 4 points if len(pts) < 4: return None return pts def calculate_polygon_overlap(ocr_poly, bubble_poly): try: if not ocr_poly or not bubble_poly: return 0.0 if len(ocr_poly) < 4 or len(bubble_poly) < 4: return 0.0 ocr_shape = Polygon(ocr_poly) bubble_shape = Polygon(bubble_poly) if not ocr_shape.is_valid: ocr_shape = ocr_shape.buffer(0) if not bubble_shape.is_valid: bubble_shape = bubble_shape.buffer(0) inter = ocr_shape.intersection(bubble_shape).area ocr_area = ocr_shape.area return inter / ocr_area if ocr_area > 0 else 0.0 except Exception as e: print(f"⚠️ calculate_polygon_overlap failed: {e}") return 0.0 def match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons, min_overlap=0.15): """ Return index of bubble with the highest overlap ratio with OCR polygon. overlap = area(ocr ∩ bubble) / area(ocr) If best overlap < min_overlap → no match. """ if not bubble_polygons: return None best_idx = None best_overlap = 0.0 for idx, bp in enumerate(bubble_polygons): overlap = calculate_polygon_overlap(ocr_poly, bp) if overlap > best_overlap: best_overlap = overlap best_idx = idx if best_idx is not None and best_overlap >= min_overlap: return best_idx return None # ====================== Polygon Correction with Bubbles =================== def correct_polygon_with_bubble(ocr_polygon, bubble_polygon, strategy="hybrid"): """ Correct OCR polygon using bubble polygon. Strategies: - "bubble": Use bubble polygon directly - "intersect": Use intersection of OCR and bubble - "expand": Slightly expand OCR region inside bubble - "hybrid": Choose based on relative sizes & intersection """ try: ocr_shape = Polygon(ocr_polygon) bubble_shape = Polygon(bubble_polygon) if not ocr_shape.is_valid: ocr_shape = ocr_shape.buffer(0) if not bubble_shape.is_valid: bubble_shape = bubble_shape.buffer(0) # ---- Strategy: use bubble fully ---- if strategy == "bubble": return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] # ---- Strategy: intersection region ---- if strategy == "intersect": inter = ocr_shape.intersection(bubble_shape) if inter.is_empty or inter.area < ocr_shape.area * 0.3: # Intersection too small, bubble is safer return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] if inter.geom_type == "Polygon": return [(int(x), int(y)) for x, y in inter.exterior.coords[:-1]] polys = list(inter.geoms) if hasattr(inter, "geoms") else [inter] largest = max(polys, key=lambda p: p.area if hasattr(p, "area") else 0) return [(int(x), int(y)) for x, y in largest.exterior.coords[:-1]] # ---- Strategy: expand OCR slightly toward bubble ---- if strategy == "expand": expanded = ocr_shape.buffer(10) # ~10px expansion clipped = expanded.intersection(bubble_shape) if not clipped.is_empty and clipped.area > ocr_shape.area * 0.5: if clipped.geom_type == "Polygon": return [(int(x), int(y)) for x, y in clipped.exterior.coords[:-1]] return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] # ---- Strategy: hybrid ---- if strategy == "hybrid": size_ratio = ( bubble_shape.area / ocr_shape.area if ocr_shape.area > 0 else 999 ) if size_ratio > 3: # Bubble is much larger than OCR region: likely multi-line speech shrunk = bubble_shape.buffer(-5) if shrunk.is_empty: return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] return [(int(x), int(y)) for x, y in shrunk.exterior.coords[:-1]] elif size_ratio < 1.5: # Similar sizes: use intersection return correct_polygon_with_bubble(ocr_polygon, bubble_polygon, "intersect") else: # Moderate difference → bubble is still safer return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] # Fallback return [(int(x), int(y)) for x, y in bubble_shape.exterior.coords[:-1]] except Exception as e: print(f"⚠️ Polygon correction failed: {e}, using original OCR polygon") return ocr_polygon def correct_ocr_polygons_with_bubbles(translations, bubble_polygons, strategy="hybrid"): """ Correct all OCR polygons using detected bubbles. Adds: - "original_polygon" - "matched_bubble_idx" Returns: updated translations list """ corrected = [] unmatched = 0 for t in translations: ocr_poly = t.get("polygon") or t.get("polygons") if not ocr_poly: corrected.append(t) continue best_idx = match_polygon_to_bubble_by_overlap(ocr_poly, bubble_polygons) t_copy = t.copy() t_copy["original_polygon"] = ocr_poly if best_idx is not None: bubble_poly = bubble_polygons[best_idx] corrected_poly = correct_polygon_with_bubble(ocr_poly, bubble_poly, strategy) t_copy["polygon"] = corrected_poly t_copy["matched_bubble_idx"] = best_idx else: # No match → keep original OCR polygon t_copy["matched_bubble_idx"] = None t_copy["polygon"] = ocr_poly unmatched += 1 corrected.append(t_copy) if unmatched: print(f"ℹ️ {unmatched}/{len(translations)} OCR regions had no matching bubble") return corrected # ========================= Basic Polygon Utilities ======================= def shrink_or_expand_polygon(polygon, shrink_ratio=0.9): """ Resize a polygon around its centroid. shrink_ratio < 1 → shrink shrink_ratio > 1 → expand """ if not polygon: return polygon ratio = shrink_ratio cx = sum(x for x, _ in polygon) / len(polygon) cy = sum(y for _, y in polygon) / len(polygon) new_poly = [ ((x - cx) * ratio + cx, (y - cy) * ratio + cy) for x, y in polygon ] return [(int(x), int(y)) for x, y in new_poly] def inpaint_polygon(img: Image.Image, polygon, mode="auto", fallback_color=(255, 255, 255)): np_img = np.array(img.convert("RGB")) mask = np.zeros((np_img.shape[0], np_img.shape[1]), dtype=np.uint8) pts = np.array(polygon, np.int32).reshape((-1, 1, 2)) cv2.fillPoly(mask, [pts], 255) # Could use cv2.inpaint for fancy filling; for manga bubbles simple fill is OK img_copy = img.copy() draw = ImageDraw.Draw(img_copy) draw.polygon(polygon, fill=fallback_color) return img_copy def merge_polygons_to_convex_hull(polygons): points = [pt for poly in polygons for pt in poly] if not points: return [] hull = MultiPoint(points).convex_hull return [(int(x), int(y)) for x, y in hull.exterior.coords[:-1]] # ======================== Rendering / Text Drawing ======================= def render_translated_chunk(img: Image.Image, translations, font_path=None, font_scale=1.0): """ Render list of translations (with 'polygon' and 'translated') onto image. """ img_copy = img.copy() for entry in translations: polygon = entry.get("polygon") or entry.get("polygons") text = entry.get("translated", "") if polygon and text: img_copy = draw_translated_text_convex( img_copy, polygon, text, font_path=font_path or FONT_PATH, font_scale=font_scale ) return img_copy def draw_translated_text_convex( img, polygon_coords, text, font_path=None, font_scale=1.0, original_polygon=None, # New: OCR polygon bubble_polygon=None # New: detected bubble polygon ): """ Inpaint + draw translated text, and draw 3 debug polygons: - RED: original OCR polygon - BLUE: bubble polygon (matched bubble) - GREEN: final render polygon (slightly shrunk) """ if font_path is None: font_path = FONT_PATH draw = ImageDraw.Draw(img, "RGBA") # --------------------------------------------------------------------- # 1. Draw ORIGINAL OCR polygon (RED) # --------------------------------------------------------------------- if original_polygon: draw.line( original_polygon + [original_polygon[0]], fill=(255, 50, 50, 200), width=3 ) # --------------------------------------------------------------------- # 2. Draw BUBBLE polygon (BLUE) # --------------------------------------------------------------------- if bubble_polygon: draw.line( bubble_polygon + [bubble_polygon[0]], fill=(50, 150, 255, 200), width=3 ) # --------------------------------------------------------------------- # 3. Compute render polygon and draw it (GREEN) # --------------------------------------------------------------------- render_polygon = shrink_or_expand_polygon(polygon_coords, shrink_ratio=0.9) draw.line( render_polygon + [render_polygon[0]], fill=(50, 255, 100, 220), # GREEN width=3 ) # --------------------------------------------------------------------- # 4. Inpaint inside final render polygon # --------------------------------------------------------------------- img = inpaint_polygon(img, render_polygon, mode="auto", fallback_color=(255, 255, 255)) # --------------------------------------------------------------------- # 5. Draw wrapped translated text # --------------------------------------------------------------------- draw_wrapped_text( img, render_polygon, text, font_path, polygon_for_size=polygon_coords, font_scale=font_scale, ) return img def draw_wrapped_text(img, polygon, text, font_path, polygon_for_size=None, font_scale=1.0): """ Draw wrapped text centered in the polygon bounding box. """ polygon_for_size = polygon_for_size or polygon draw = ImageDraw.Draw(img) xs, ys = zip(*polygon_for_size) x_min, x_max = min(xs), max(xs) y_min, y_max = min(ys), max(ys) box_width = x_max - x_min box_height = y_max - y_min if box_width <= 0 or box_height <= 0: return avg_char_width = 0.4 estimated_size = int(min(box_height / 1.2, box_width / (len(text) * avg_char_width + 1))) estimated_size = max(6, estimated_size) font_size = int(estimated_size * font_scale) font = ImageFont.truetype(font_path, font_size) max_chars = max(1, int(box_width / (font.getbbox("A")[2] + 1))) wrapped = textwrap.fill(text, width=max_chars) bbox = draw.textbbox((0, 0), wrapped, font=font) text_w, text_h = bbox[2] - bbox[0], bbox[3] - bbox[1] x = x_min + (box_width - text_w) / 2 y = y_min + (box_height - text_h) / 2 draw.text((x, y), wrapped, font=font, fill="black", align="center")