Spaces:
Sleeping
Sleeping
File size: 3,603 Bytes
2c1aa80 7286949 524a697 5846d46 a479a6f d5f595e 2c1aa80 3fef120 d5f595e f8d17f8 d5f595e 2c1aa80 a479a6f d5f595e 2c1aa80 d5f595e f15a377 d5f595e a479a6f d5f595e f15a377 d5f595e a479a6f d5f595e f15a377 f8d17f8 d5f595e 2c1aa80 c98b84d 524a697 c98b84d 524a697 2c1aa80 c98b84d f15a377 2c1aa80 f15a377 2c1aa80 f15a377 2c1aa80 f15a377 2c1aa80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from paddleocr import PaddleOCR
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import ConvexHull
from utils.azure_translate import translate_text_azure
from math import dist
import numpy as np
from shapely.geometry import box as shapely_box
from shapely.geometry import Polygon
from shapely.ops import unary_union
import networkx as nx
from shapely.ops import unary_union
ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')
def inflate_polygon(polygon_points, percent=0.05):
poly = Polygon(polygon_points)
if not poly.is_valid:
poly = poly.convex_hull
minx, miny, maxx, maxy = poly.bounds
diagonal = ((maxx - minx)**2 + (maxy - miny)**2)**0.5
inflate_dist = diagonal * percent
return poly.buffer(inflate_dist)
def group_nearby_boxes(lines, inflation_percent=0.05):
from collections import defaultdict
n = len(lines)
inflated_polys = []
original_polys = []
texts = []
for poly_pts, text in lines:
inflated = inflate_polygon(poly_pts, percent=inflation_percent)
original = Polygon(poly_pts)
inflated_polys.append(inflated)
original_polys.append(original)
texts.append(text)
# Build connectivity graph
adjacency = defaultdict(set)
for i in range(n):
for j in range(i + 1, n):
if inflated_polys[i].intersects(inflated_polys[j]):
adjacency[i].add(j)
adjacency[j].add(i)
# DFS to find connected components
visited = [False] * n
groups = []
def dfs(i, group):
visited[i] = True
group.append(i)
for neighbor in adjacency[i]:
if not visited[neighbor]:
dfs(neighbor, group)
for i in range(n):
if not visited[i]:
group = []
dfs(i, group)
groups.append(group)
# Construct output groups
grouped = []
for group in groups:
group_polys = [list(original_polys[i].exterior.coords) for i in group]
group_texts = [texts[i] for i in group]
grouped.append({
"polygons": group_polys,
"texts": group_texts
})
return grouped
def extract_and_translate_chunk(image: Image.Image):
np_img = np.array(image)
results = ocr_model.ocr(np_img)
if not results or not isinstance(results[0], dict):
return []
result_dict = results[0]
polygons = result_dict.get("rec_polys", [])
texts = result_dict.get("rec_texts", [])
if not polygons or not texts or len(polygons) != len(texts):
return []
lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts))
print("🔍 OCR Raw Output:", lines)
grouped = group_nearby_boxes(lines)
translations = []
for group in grouped:
polygons = group["polygons"]
texts = group["texts"]
merged_text = "".join(texts).strip()
if not merged_text:
continue
try:
translated = translate_text_azure(merged_text)
except Exception as e:
print("⚠️ Translation failed:", e)
translated = ""
all_points = np.array([pt for polygon in polygons for pt in polygon])
if len(all_points) < 3:
continue
hull_indices = ConvexHull(all_points).vertices
hull = [tuple(map(int, all_points[i])) for i in hull_indices]
translations.append({
"original": merged_text,
"translated": translated,
"polygon": hull
})
return translations |