Spaces:
Running
Running
File size: 8,705 Bytes
2c1aa80 7286949 524a697 5846d46 a479a6f d5f595e b164a37 2c1aa80 3fef120 d5f595e 727f54e d5f595e f8d17f8 727f54e d5f595e 727f54e d5f595e 2c1aa80 a479a6f d5f595e 2c1aa80 d5f595e f15a377 d5f595e a479a6f d5f595e f15a377 d5f595e a479a6f d5f595e f15a377 f8d17f8 d5f595e 2c1aa80 727f54e 2c1aa80 727f54e 2c1aa80 c98b84d 524a697 c98b84d 524a697 2c1aa80 c98b84d f15a377 2c1aa80 f15a377 2c1aa80 f15a377 2c1aa80 f15a377 2c1aa80 b83f54e 727f54e e67bb3b 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e b83f54e 727f54e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
from paddleocr import PaddleOCR
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from scipy.spatial import ConvexHull
from utils.azure_translate import translate_text_azure
from math import dist
import numpy as np
from shapely.geometry import box as shapely_box
from shapely.geometry import Polygon
from shapely.ops import unary_union
import networkx as nx
from shapely.ops import unary_union
from utils.bubble_detect_rtdetr import polygon_to_mask
ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')
def inflate_polygon(polygon_points, percent=0.05):
"""
Inflate a polygon by a given percentage of its diagonal.
Args:
polygon_points: List of (x, y) coordinates
percent: Inflation percentage (0.05 = 5%)
Returns:
Shapely Polygon inflated by the specified amount
"""
poly = Polygon(polygon_points)
if not poly.is_valid:
poly = poly.convex_hull
minx, miny, maxx, maxy = poly.bounds
diagonal = ((maxx - minx)**2 + (maxy - miny)**2)**0.5
inflate_dist = diagonal * percent
return poly.buffer(inflate_dist)
def group_nearby_boxes(lines, inflation_percent=0.05):
"""
Group nearby text boxes by checking if their inflated polygons intersect.
Args:
lines: List of (polygon_points, text) tuples
inflation_percent: How much to inflate polygons for grouping detection
Returns:
List of groups, each containing {"polygons": [...], "texts": [...]}
"""
from collections import defaultdict
n = len(lines)
inflated_polys = []
original_polys = []
texts = []
for poly_pts, text in lines:
inflated = inflate_polygon(poly_pts, percent=inflation_percent)
original = Polygon(poly_pts)
inflated_polys.append(inflated)
original_polys.append(original)
texts.append(text)
# Build connectivity graph
adjacency = defaultdict(set)
for i in range(n):
for j in range(i + 1, n):
if inflated_polys[i].intersects(inflated_polys[j]):
adjacency[i].add(j)
adjacency[j].add(i)
# DFS to find connected components
visited = [False] * n
groups = []
def dfs(i, group):
visited[i] = True
group.append(i)
for neighbor in adjacency[i]:
if not visited[neighbor]:
dfs(neighbor, group)
for i in range(n):
if not visited[i]:
group = []
dfs(i, group)
groups.append(group)
# Construct output groups
grouped = []
for group in groups:
group_polys = [list(original_polys[i].exterior.coords) for i in group]
group_texts = [texts[i] for i in group]
grouped.append({
"polygons": group_polys,
"texts": group_texts
})
return grouped
def extract_and_translate_chunk(image: Image.Image):
"""
Extract text from entire image and translate.
Groups nearby text boxes before translation.
"""
np_img = np.array(image)
results = ocr_model.ocr(np_img)
if not results or not isinstance(results[0], dict):
return []
result_dict = results[0]
polygons = result_dict.get("rec_polys", [])
texts = result_dict.get("rec_texts", [])
if not polygons or not texts or len(polygons) != len(texts):
return []
lines = list(zip([[(int(x), int(y)) for x, y in poly] for poly in polygons], texts))
print("π OCR Raw Output:", lines)
grouped = group_nearby_boxes(lines)
translations = []
for group in grouped:
polygons = group["polygons"]
texts = group["texts"]
merged_text = "".join(texts).strip()
if not merged_text:
continue
try:
translated = translate_text_azure(merged_text)
except Exception as e:
print("β οΈ Translation failed:", e)
translated = ""
all_points = np.array([pt for polygon in polygons for pt in polygon])
if len(all_points) < 3:
continue
hull_indices = ConvexHull(all_points).vertices
hull = [tuple(map(int, all_points[i])) for i in hull_indices]
translations.append({
"original": merged_text,
"translated": translated,
"polygon": hull
})
return translations
def extract_and_translate_with_masks(
full_img,
interior_polygons,
grouping_inflation=0.05,
final_inflation=0
):
"""
OCR ONLY inside bubble interior polygons, with grouping and inflation.
Args:
full_img: PIL Image
interior_polygons: List of bubble interior polygons [(x,y)]
grouping_inflation: % used for grouping OCR boxes
final_inflation: % used to enlarge final rendering polygon
Returns:
List of dict:
- original
- translated
- polygon (inflated hull)
- matched_bubble_idx
"""
np_img = np.array(full_img)
H, W = np_img.shape[:2]
translations = []
for idx, poly in enumerate(interior_polygons):
if not poly:
continue
# ----------------------------------------------------
# 1) Mask the bubble region (white outside bubble)
# ----------------------------------------------------
mask = polygon_to_mask((W, H), poly)
bubble_img = np.where(mask[..., None] == 255, np_img, 255).astype(np.uint8)
# ----------------------------------------------------
# 2) OCR inside bubble
# ----------------------------------------------------
results = ocr_model.ocr(bubble_img)
if not results or not isinstance(results[0], dict):
continue
res = results[0]
polys = res.get("rec_polys", [])
texts = res.get("rec_texts", [])
if not polys or not texts:
continue
# Convert polys to global coordinates
lines = []
for poly_coords, text in zip(polys, texts):
text_stripped = text.strip()
if not text_stripped:
continue
poly_global = [(int(x), int(y)) for x, y in poly_coords]
lines.append((poly_global, text_stripped))
if not lines:
continue
print(f"π Bubble {idx}: Found {len(lines)} text boxes")
# ----------------------------------------------------
# 3) Group nearby OCR text boxes
# ----------------------------------------------------
grouped = group_nearby_boxes(lines, inflation_percent=grouping_inflation)
print(f" β Grouped into {len(grouped)} groups")
# ----------------------------------------------------
# 4) Process each group β merge text + hull + inflation
# ----------------------------------------------------
for group in grouped:
group_polys = group["polygons"]
group_texts = group["texts"]
merged_text = "".join(group_texts).strip()
if not merged_text:
continue
try:
translated = translate_text_azure(merged_text)
except Exception as e:
print(f"β οΈ Translation failed: {e}")
translated = merged_text
# Get all points in the group boxes
all_points = np.array([pt for polygon in group_polys for pt in polygon])
if len(all_points) < 3:
continue
hull_idx = ConvexHull(all_points).vertices
hull_coords = [tuple(map(int, all_points[i])) for i in hull_idx]
# ------------------------------------------------
# 5) Inflate using EXISTING inflate_polygon()
# ------------------------------------------------
inflated_poly = inflate_polygon(hull_coords, percent=final_inflation)
# Convert back to a list of coords
if inflated_poly.geom_type == "Polygon":
final_coords = [(int(x), int(y)) for x, y in inflated_poly.exterior.coords[:-1]]
else:
# Fallback: use convex hull of multipolygon union
final_coords = [
(int(x), int(y)) for x, y in inflated_poly.convex_hull.exterior.coords[:-1]
]
translations.append({
"original": merged_text,
"translated": translated,
"polygon": final_coords,
"matched_bubble_idx": idx,
"num_text_boxes": len(group_texts),
})
print(f"β
Total translations extracted: {len(translations)}")
return translations
|