File size: 3,603 Bytes
2c1aa80
 
7286949
524a697
5846d46
a479a6f
 
 
 
 
 
d5f595e
2c1aa80
 
 
3fef120
d5f595e
 
 
 
 
 
 
 
f8d17f8
d5f595e
 
2c1aa80
a479a6f
d5f595e
 
 
2c1aa80
d5f595e
 
 
 
 
 
f15a377
d5f595e
 
a479a6f
 
d5f595e
 
 
f15a377
d5f595e
 
a479a6f
d5f595e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f15a377
 
 
f8d17f8
d5f595e
2c1aa80
 
 
 
c98b84d
524a697
 
 
 
 
 
 
 
c98b84d
 
524a697
2c1aa80
 
 
 
c98b84d
f15a377
2c1aa80
 
f15a377
2c1aa80
f15a377
2c1aa80
 
 
 
 
 
 
 
 
 
 
f15a377
2c1aa80
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from paddleocr import PaddleOCR
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import ConvexHull
from utils.azure_translate import translate_text_azure
from math import dist
import numpy as np
from shapely.geometry import box as shapely_box
from shapely.geometry import Polygon
from shapely.ops import unary_union
import networkx as nx
from shapely.ops import unary_union

ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')


def inflate_polygon(polygon_points, percent=0.05):
    poly = Polygon(polygon_points)
    if not poly.is_valid:
        poly = poly.convex_hull
    minx, miny, maxx, maxy = poly.bounds
    diagonal = ((maxx - minx)**2 + (maxy - miny)**2)**0.5
    inflate_dist = diagonal * percent
    return poly.buffer(inflate_dist)

def group_nearby_boxes(lines, inflation_percent=0.05):
    from collections import defaultdict

    n = len(lines)
    inflated_polys = []
    original_polys = []
    texts = []

    for poly_pts, text in lines:
        inflated = inflate_polygon(poly_pts, percent=inflation_percent)
        original = Polygon(poly_pts)
        inflated_polys.append(inflated)
        original_polys.append(original)
        texts.append(text)

    # Build connectivity graph
    adjacency = defaultdict(set)
    for i in range(n):
        for j in range(i + 1, n):
            if inflated_polys[i].intersects(inflated_polys[j]):
                adjacency[i].add(j)
                adjacency[j].add(i)

    # DFS to find connected components
    visited = [False] * n
    groups = []

    def dfs(i, group):
        visited[i] = True
        group.append(i)
        for neighbor in adjacency[i]:
            if not visited[neighbor]:
                dfs(neighbor, group)

    for i in range(n):
        if not visited[i]:
            group = []
            dfs(i, group)
            groups.append(group)

    # Construct output groups
    grouped = []
    for group in groups:
        group_polys = [list(original_polys[i].exterior.coords) for i in group]
        group_texts = [texts[i] for i in group]
        grouped.append({
            "polygons": group_polys,
            "texts": group_texts
        })

    return grouped

def extract_and_translate_chunk(image: Image.Image):
    np_img = np.array(image)
    results = ocr_model.ocr(np_img)

    if not results or not isinstance(results[0], dict):
        return []

    result_dict = results[0]
    polygons = result_dict.get("rec_polys", [])
    texts = result_dict.get("rec_texts", [])

    if not polygons or not texts or len(polygons) != len(texts):
        return []

    lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts))

    print("🔍 OCR Raw Output:", lines)

    grouped = group_nearby_boxes(lines)
    translations = []

    for group in grouped:
        polygons = group["polygons"]
        texts = group["texts"]

        merged_text = "".join(texts).strip()
        if not merged_text:
            continue

        try:
            translated = translate_text_azure(merged_text)
        except Exception as e:
            print("⚠️ Translation failed:", e)
            translated = ""

        all_points = np.array([pt for polygon in polygons for pt in polygon])
        if len(all_points) < 3:
            continue

        hull_indices = ConvexHull(all_points).vertices
        hull = [tuple(map(int, all_points[i])) for i in hull_indices]

        translations.append({
            "original": merged_text,
            "translated": translated,
            "polygon": hull
        })

    return translations