Spaces:
Running
Running
| from paddleocr import PaddleOCR | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont | |
| from scipy.spatial import ConvexHull | |
| from utils.azure_translate import translate_text_azure | |
| from math import dist | |
| import numpy as np | |
| from shapely.geometry import box as shapely_box | |
| from shapely.geometry import Polygon | |
| from shapely.ops import unary_union | |
| import networkx as nx | |
| from shapely.ops import unary_union | |
| ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch') | |
| def inflate_polygon(polygon_points, percent=0.05): | |
| poly = Polygon(polygon_points) | |
| if not poly.is_valid: | |
| poly = poly.convex_hull | |
| minx, miny, maxx, maxy = poly.bounds | |
| diagonal = ((maxx - minx)**2 + (maxy - miny)**2)**0.5 | |
| inflate_dist = diagonal * percent | |
| return poly.buffer(inflate_dist) | |
| def group_nearby_boxes(lines, inflation_percent=0.05): | |
| from collections import defaultdict | |
| n = len(lines) | |
| inflated_polys = [] | |
| original_polys = [] | |
| texts = [] | |
| for poly_pts, text in lines: | |
| inflated = inflate_polygon(poly_pts, percent=inflation_percent) | |
| original = Polygon(poly_pts) | |
| inflated_polys.append(inflated) | |
| original_polys.append(original) | |
| texts.append(text) | |
| # Build connectivity graph | |
| adjacency = defaultdict(set) | |
| for i in range(n): | |
| for j in range(i + 1, n): | |
| if inflated_polys[i].intersects(inflated_polys[j]): | |
| adjacency[i].add(j) | |
| adjacency[j].add(i) | |
| # DFS to find connected components | |
| visited = [False] * n | |
| groups = [] | |
| def dfs(i, group): | |
| visited[i] = True | |
| group.append(i) | |
| for neighbor in adjacency[i]: | |
| if not visited[neighbor]: | |
| dfs(neighbor, group) | |
| for i in range(n): | |
| if not visited[i]: | |
| group = [] | |
| dfs(i, group) | |
| groups.append(group) | |
| # Construct output groups | |
| grouped = [] | |
| for group in groups: | |
| group_polys = [list(original_polys[i].exterior.coords) for i in group] | |
| group_texts = [texts[i] for i in group] | |
| grouped.append({ | |
| "polygons": group_polys, | |
| "texts": group_texts | |
| }) | |
| return grouped | |
| def extract_and_translate_chunk(image: Image.Image): | |
| np_img = np.array(image) | |
| results = ocr_model.ocr(np_img) | |
| if not results or not isinstance(results[0], dict): | |
| return [] | |
| result_dict = results[0] | |
| polygons = result_dict.get("rec_polys", []) | |
| texts = result_dict.get("rec_texts", []) | |
| if not polygons or not texts or len(polygons) != len(texts): | |
| return [] | |
| lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts)) | |
| print("🔍 OCR Raw Output:", lines) | |
| grouped = group_nearby_boxes(lines) | |
| translations = [] | |
| for group in grouped: | |
| polygons = group["polygons"] | |
| texts = group["texts"] | |
| merged_text = "".join(texts).strip() | |
| if not merged_text: | |
| continue | |
| try: | |
| translated = translate_text_azure(merged_text) | |
| except Exception as e: | |
| print("⚠️ Translation failed:", e) | |
| translated = "" | |
| all_points = np.array([pt for polygon in polygons for pt in polygon]) | |
| if len(all_points) < 3: | |
| continue | |
| hull_indices = ConvexHull(all_points).vertices | |
| hull = [tuple(map(int, all_points[i])) for i in hull_indices] | |
| translations.append({ | |
| "original": merged_text, | |
| "translated": translated, | |
| "polygon": hull | |
| }) | |
| return translations |