File size: 8,705 Bytes
2c1aa80
 
7286949
524a697
5846d46
a479a6f
 
 
 
 
 
d5f595e
b164a37
 
2c1aa80
 
 
3fef120
d5f595e
727f54e
 
 
 
 
 
 
 
 
 
d5f595e
 
 
 
 
 
 
f8d17f8
727f54e
d5f595e
727f54e
 
 
 
 
 
 
 
 
 
d5f595e
2c1aa80
a479a6f
d5f595e
 
 
2c1aa80
d5f595e
 
 
 
 
 
f15a377
d5f595e
 
a479a6f
 
d5f595e
 
 
f15a377
d5f595e
 
a479a6f
d5f595e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f15a377
 
 
f8d17f8
d5f595e
2c1aa80
727f54e
2c1aa80
727f54e
 
 
 
2c1aa80
 
c98b84d
524a697
 
 
 
 
 
 
 
c98b84d
 
524a697
2c1aa80
 
 
 
c98b84d
f15a377
2c1aa80
 
f15a377
2c1aa80
f15a377
2c1aa80
 
 
 
 
 
 
 
 
 
 
f15a377
2c1aa80
 
 
 
 
 
 
 
 
 
b83f54e
 
727f54e
 
 
 
e67bb3b
727f54e
b83f54e
727f54e
 
 
 
 
 
 
 
 
 
 
 
 
 
b83f54e
 
 
 
 
 
 
 
 
 
727f54e
 
 
b83f54e
 
 
727f54e
 
 
b83f54e
 
 
 
 
 
 
727f54e
 
b83f54e
727f54e
 
b83f54e
727f54e
 
b83f54e
 
 
727f54e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b83f54e
 
 
727f54e
 
b83f54e
 
727f54e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b83f54e
 
 
727f54e
 
 
b83f54e
 
727f54e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
from paddleocr import PaddleOCR
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import ConvexHull
from utils.azure_translate import translate_text_azure
from math import dist
import numpy as np
from shapely.geometry import box as shapely_box
from shapely.geometry import Polygon
from shapely.ops import unary_union
import networkx as nx
from shapely.ops import unary_union
from utils.bubble_detect_rtdetr import polygon_to_mask


ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')


def inflate_polygon(polygon_points, percent=0.05):
    """
    Inflate a polygon by a given percentage of its diagonal.
    
    Args:
        polygon_points: List of (x, y) coordinates
        percent: Inflation percentage (0.05 = 5%)
    
    Returns:
        Shapely Polygon inflated by the specified amount
    """
    poly = Polygon(polygon_points)
    if not poly.is_valid:
        poly = poly.convex_hull
    minx, miny, maxx, maxy = poly.bounds
    diagonal = ((maxx - minx)**2 + (maxy - miny)**2)**0.5
    inflate_dist = diagonal * percent
    return poly.buffer(inflate_dist)


def group_nearby_boxes(lines, inflation_percent=0.05):
    """
    Group nearby text boxes by checking if their inflated polygons intersect.
    
    Args:
        lines: List of (polygon_points, text) tuples
        inflation_percent: How much to inflate polygons for grouping detection
    
    Returns:
        List of groups, each containing {"polygons": [...], "texts": [...]}
    """
    from collections import defaultdict

    n = len(lines)
    inflated_polys = []
    original_polys = []
    texts = []

    for poly_pts, text in lines:
        inflated = inflate_polygon(poly_pts, percent=inflation_percent)
        original = Polygon(poly_pts)
        inflated_polys.append(inflated)
        original_polys.append(original)
        texts.append(text)

    # Build connectivity graph
    adjacency = defaultdict(set)
    for i in range(n):
        for j in range(i + 1, n):
            if inflated_polys[i].intersects(inflated_polys[j]):
                adjacency[i].add(j)
                adjacency[j].add(i)

    # DFS to find connected components
    visited = [False] * n
    groups = []

    def dfs(i, group):
        visited[i] = True
        group.append(i)
        for neighbor in adjacency[i]:
            if not visited[neighbor]:
                dfs(neighbor, group)

    for i in range(n):
        if not visited[i]:
            group = []
            dfs(i, group)
            groups.append(group)

    # Construct output groups
    grouped = []
    for group in groups:
        group_polys = [list(original_polys[i].exterior.coords) for i in group]
        group_texts = [texts[i] for i in group]
        grouped.append({
            "polygons": group_polys,
            "texts": group_texts
        })

    return grouped


def extract_and_translate_chunk(image: Image.Image):
    """
    Extract text from entire image and translate.
    Groups nearby text boxes before translation.
    """
    np_img = np.array(image)
    results = ocr_model.ocr(np_img)

    if not results or not isinstance(results[0], dict):
        return []

    result_dict = results[0]
    polygons = result_dict.get("rec_polys", [])
    texts = result_dict.get("rec_texts", [])

    if not polygons or not texts or len(polygons) != len(texts):
        return []

    lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts))

    print("πŸ” OCR Raw Output:", lines)

    grouped = group_nearby_boxes(lines)
    translations = []

    for group in grouped:
        polygons = group["polygons"]
        texts = group["texts"]

        merged_text = "".join(texts).strip()
        if not merged_text:
            continue

        try:
            translated = translate_text_azure(merged_text)
        except Exception as e:
            print("⚠️ Translation failed:", e)
            translated = ""

        all_points = np.array([pt for polygon in polygons for pt in polygon])
        if len(all_points) < 3:
            continue

        hull_indices = ConvexHull(all_points).vertices
        hull = [tuple(map(int, all_points[i])) for i in hull_indices]

        translations.append({
            "original": merged_text,
            "translated": translated,
            "polygon": hull
        })

    return translations

def extract_and_translate_with_masks(
    full_img,
    interior_polygons,
    grouping_inflation=0.05,
    final_inflation=0
):
    """
    OCR ONLY inside bubble interior polygons, with grouping and inflation.
    
    Args:
        full_img: PIL Image
        interior_polygons: List of bubble interior polygons [(x,y)]
        grouping_inflation: % used for grouping OCR boxes
        final_inflation: % used to enlarge final rendering polygon
    
    Returns:
        List of dict:
        - original
        - translated
        - polygon (inflated hull)
        - matched_bubble_idx
    """
    np_img = np.array(full_img)
    H, W = np_img.shape[:2]

    translations = []

    for idx, poly in enumerate(interior_polygons):
        if not poly:
            continue

        # ----------------------------------------------------
        # 1) Mask the bubble region (white outside bubble)
        # ----------------------------------------------------
        mask = polygon_to_mask((W, H), poly)
        bubble_img = np.where(mask[..., None] == 255, np_img, 255).astype(np.uint8)

        # ----------------------------------------------------
        # 2) OCR inside bubble
        # ----------------------------------------------------
        results = ocr_model.ocr(bubble_img)
        if not results or not isinstance(results[0], dict):
            continue

        res = results[0]
        polys = res.get("rec_polys", [])
        texts = res.get("rec_texts", [])
        if not polys or not texts:
            continue

        # Convert polys to global coordinates
        lines = []
        for poly_coords, text in zip(polys, texts):
            text_stripped = text.strip()
            if not text_stripped:
                continue

            poly_global = [(int(x), int(y)) for x, y in poly_coords]
            lines.append((poly_global, text_stripped))

        if not lines:
            continue

        print(f"πŸ” Bubble {idx}: Found {len(lines)} text boxes")

        # ----------------------------------------------------
        # 3) Group nearby OCR text boxes
        # ----------------------------------------------------
        grouped = group_nearby_boxes(lines, inflation_percent=grouping_inflation)
        print(f"   β†’ Grouped into {len(grouped)} groups")

        # ----------------------------------------------------
        # 4) Process each group β†’ merge text + hull + inflation
        # ----------------------------------------------------
        for group in grouped:
            group_polys = group["polygons"]
            group_texts = group["texts"]

            merged_text = "".join(group_texts).strip()
            if not merged_text:
                continue

            try:
                translated = translate_text_azure(merged_text)
            except Exception as e:
                print(f"⚠️ Translation failed: {e}")
                translated = merged_text

            # Get all points in the group boxes
            all_points = np.array([pt for polygon in group_polys for pt in polygon])
            if len(all_points) < 3:
                continue

            hull_idx = ConvexHull(all_points).vertices
            hull_coords = [tuple(map(int, all_points[i])) for i in hull_idx]

            # ------------------------------------------------
            # 5) Inflate using EXISTING inflate_polygon()
            # ------------------------------------------------
            inflated_poly = inflate_polygon(hull_coords, percent=final_inflation)

            # Convert back to a list of coords
            if inflated_poly.geom_type == "Polygon":
                final_coords = [(int(x), int(y)) for x, y in inflated_poly.exterior.coords[:-1]]
            else:
                # Fallback: use convex hull of multipolygon union
                final_coords = [
                    (int(x), int(y)) for x, y in inflated_poly.convex_hull.exterior.coords[:-1]
                ]

            translations.append({
                "original": merged_text,
                "translated": translated,
                "polygon": final_coords,
                "matched_bubble_idx": idx,
                "num_text_boxes": len(group_texts),
            })

    print(f"βœ… Total translations extracted: {len(translations)}")
    return translations