Spaces:

Curify
/

manga_translation

Sleeping

App Files Files Community

manga_translation / utils /ocr_utils.py

qqwjq1981

Update utils/ocr_utils.py

e67bb3b verified 24 days ago

raw

history blame contribute delete

8.71 kB

	from paddleocr import PaddleOCR
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	from scipy.spatial import ConvexHull
	from utils.azure_translate import translate_text_azure
	from math import dist
	import numpy as np
	from shapely.geometry import box as shapely_box
	from shapely.geometry import Polygon
	from shapely.ops import unary_union
	import networkx as nx
	from shapely.ops import unary_union
	from utils.bubble_detect_rtdetr import polygon_to_mask


	ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')


	def inflate_polygon(polygon_points, percent=0.05):
	"""
	Inflate a polygon by a given percentage of its diagonal.

	Args:
	polygon_points: List of (x, y) coordinates
	percent: Inflation percentage (0.05 = 5%)

	Returns:
	Shapely Polygon inflated by the specified amount
	"""
	poly = Polygon(polygon_points)
	if not poly.is_valid:
	poly = poly.convex_hull
	minx, miny, maxx, maxy = poly.bounds
	diagonal = ((maxx - minx)2 + (maxy - miny)2)**0.5
	inflate_dist = diagonal * percent
	return poly.buffer(inflate_dist)


	def group_nearby_boxes(lines, inflation_percent=0.05):
	"""
	Group nearby text boxes by checking if their inflated polygons intersect.

	Args:
	lines: List of (polygon_points, text) tuples
	inflation_percent: How much to inflate polygons for grouping detection

	Returns:
	List of groups, each containing {"polygons": [...], "texts": [...]}
	"""
	from collections import defaultdict

	n = len(lines)
	inflated_polys = []
	original_polys = []
	texts = []

	for poly_pts, text in lines:
	inflated = inflate_polygon(poly_pts, percent=inflation_percent)
	original = Polygon(poly_pts)
	inflated_polys.append(inflated)
	original_polys.append(original)
	texts.append(text)

	# Build connectivity graph
	adjacency = defaultdict(set)
	for i in range(n):
	for j in range(i + 1, n):
	if inflated_polys[i].intersects(inflated_polys[j]):
	adjacency[i].add(j)
	adjacency[j].add(i)

	# DFS to find connected components
	visited = [False] * n
	groups = []

	def dfs(i, group):
	visited[i] = True
	group.append(i)
	for neighbor in adjacency[i]:
	if not visited[neighbor]:
	dfs(neighbor, group)

	for i in range(n):
	if not visited[i]:
	group = []
	dfs(i, group)
	groups.append(group)

	# Construct output groups
	grouped = []
	for group in groups:
	group_polys = [list(original_polys[i].exterior.coords) for i in group]
	group_texts = [texts[i] for i in group]
	grouped.append({
	"polygons": group_polys,
	"texts": group_texts
	})

	return grouped


	def extract_and_translate_chunk(image: Image.Image):
	"""
	Extract text from entire image and translate.
	Groups nearby text boxes before translation.
	"""
	np_img = np.array(image)
	results = ocr_model.ocr(np_img)

	if not results or not isinstance(results[0], dict):
	return []

	result_dict = results[0]
	polygons = result_dict.get("rec_polys", [])
	texts = result_dict.get("rec_texts", [])

	if not polygons or not texts or len(polygons) != len(texts):
	return []

	lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts))

	print("🔍 OCR Raw Output:", lines)

	grouped = group_nearby_boxes(lines)
	translations = []

	for group in grouped:
	polygons = group["polygons"]
	texts = group["texts"]

	merged_text = "".join(texts).strip()
	if not merged_text:
	continue

	try:
	translated = translate_text_azure(merged_text)
	except Exception as e:
	print("⚠️ Translation failed:", e)
	translated = ""

	all_points = np.array([pt for polygon in polygons for pt in polygon])
	if len(all_points) < 3:
	continue

	hull_indices = ConvexHull(all_points).vertices
	hull = [tuple(map(int, all_points[i])) for i in hull_indices]

	translations.append({
	"original": merged_text,
	"translated": translated,
	"polygon": hull
	})

	return translations

	def extract_and_translate_with_masks(
	full_img,
	interior_polygons,
	grouping_inflation=0.05,
	final_inflation=0
	):
	"""
	OCR ONLY inside bubble interior polygons, with grouping and inflation.

	Args:
	full_img: PIL Image
	interior_polygons: List of bubble interior polygons [(x,y)]
	grouping_inflation: % used for grouping OCR boxes
	final_inflation: % used to enlarge final rendering polygon

	Returns:
	List of dict:
	- original
	- translated
	- polygon (inflated hull)
	- matched_bubble_idx
	"""
	np_img = np.array(full_img)
	H, W = np_img.shape[:2]

	translations = []

	for idx, poly in enumerate(interior_polygons):
	if not poly:
	continue

	# ----------------------------------------------------
	# 1) Mask the bubble region (white outside bubble)
	# ----------------------------------------------------
	mask = polygon_to_mask((W, H), poly)
	bubble_img = np.where(mask[..., None] == 255, np_img, 255).astype(np.uint8)

	# ----------------------------------------------------
	# 2) OCR inside bubble
	# ----------------------------------------------------
	results = ocr_model.ocr(bubble_img)
	if not results or not isinstance(results[0], dict):
	continue

	res = results[0]
	polys = res.get("rec_polys", [])
	texts = res.get("rec_texts", [])
	if not polys or not texts:
	continue

	# Convert polys to global coordinates
	lines = []
	for poly_coords, text in zip(polys, texts):
	text_stripped = text.strip()
	if not text_stripped:
	continue

	poly_global = [(int(x), int(y)) for x, y in poly_coords]
	lines.append((poly_global, text_stripped))

	if not lines:
	continue

	print(f"🔍 Bubble {idx}: Found {len(lines)} text boxes")

	# ----------------------------------------------------
	# 3) Group nearby OCR text boxes
	# ----------------------------------------------------
	grouped = group_nearby_boxes(lines, inflation_percent=grouping_inflation)
	print(f" → Grouped into {len(grouped)} groups")

	# ----------------------------------------------------
	# 4) Process each group → merge text + hull + inflation
	# ----------------------------------------------------
	for group in grouped:
	group_polys = group["polygons"]
	group_texts = group["texts"]

	merged_text = "".join(group_texts).strip()
	if not merged_text:
	continue

	try:
	translated = translate_text_azure(merged_text)
	except Exception as e:
	print(f"⚠️ Translation failed: {e}")
	translated = merged_text

	# Get all points in the group boxes
	all_points = np.array([pt for polygon in group_polys for pt in polygon])
	if len(all_points) < 3:
	continue

	hull_idx = ConvexHull(all_points).vertices
	hull_coords = [tuple(map(int, all_points[i])) for i in hull_idx]

	# ------------------------------------------------
	# 5) Inflate using EXISTING inflate_polygon()
	# ------------------------------------------------
	inflated_poly = inflate_polygon(hull_coords, percent=final_inflation)

	# Convert back to a list of coords
	if inflated_poly.geom_type == "Polygon":
	final_coords = [(int(x), int(y)) for x, y in inflated_poly.exterior.coords[:-1]]
	else:
	# Fallback: use convex hull of multipolygon union
	final_coords = [
	(int(x), int(y)) for x, y in inflated_poly.convex_hull.exterior.coords[:-1]
	]

	translations.append({
	"original": merged_text,
	"translated": translated,
	"polygon": final_coords,
	"matched_bubble_idx": idx,
	"num_text_boxes": len(group_texts),
	})

	print(f"✅ Total translations extracted: {len(translations)}")
	return translations