Spaces:
Runtime error
Runtime error
Update utils/ocr_utils.py
Browse files- utils/ocr_utils.py +22 -19
utils/ocr_utils.py
CHANGED
|
@@ -41,29 +41,33 @@ def group_nearby_boxes(lines, max_y_gap=50):
|
|
| 41 |
def extract_and_translate_chunk(image: Image.Image):
|
| 42 |
np_img = np.array(image)
|
| 43 |
results = ocr_model.ocr(np_img)
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
lines = []
|
| 46 |
-
for
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
if isinstance(text_info, (tuple, list)):
|
| 51 |
-
text = text_info[0]
|
| 52 |
-
else:
|
| 53 |
-
text = ""
|
| 54 |
-
|
| 55 |
-
if not text.strip():
|
| 56 |
-
continue
|
| 57 |
-
|
| 58 |
-
if all(isinstance(pt, (list, tuple)) and len(pt) == 2 for pt in box_raw):
|
| 59 |
-
polygon = [(int(x), int(y)) for x, y in box_raw]
|
| 60 |
-
lines.append((polygon, text)) # ✅ This creates the proper format
|
| 61 |
|
| 62 |
print("🔍 OCR Raw Output:", lines)
|
| 63 |
|
|
|
|
| 64 |
grouped = group_nearby_boxes(lines)
|
| 65 |
-
translations = []
|
| 66 |
|
|
|
|
|
|
|
| 67 |
for group in grouped:
|
| 68 |
polygons = group["polygons"]
|
| 69 |
merged_text = "".join(group["texts"]).strip()
|
|
@@ -77,10 +81,9 @@ def extract_and_translate_chunk(image: Image.Image):
|
|
| 77 |
print("⚠️ Translation failed:", e)
|
| 78 |
translated = ""
|
| 79 |
|
| 80 |
-
# Flatten all polygon points for convex hull
|
| 81 |
all_points = np.array([pt for polygon in polygons for pt in polygon])
|
| 82 |
if len(all_points) < 3:
|
| 83 |
-
continue # convex hull needs
|
| 84 |
|
| 85 |
hull_indices = ConvexHull(all_points).vertices
|
| 86 |
hull = [tuple(map(int, all_points[i])) for i in hull_indices]
|
|
|
|
| 41 |
def extract_and_translate_chunk(image: Image.Image):
|
| 42 |
np_img = np.array(image)
|
| 43 |
results = ocr_model.ocr(np_img)
|
| 44 |
+
|
| 45 |
+
if not results or not isinstance(results[0], dict):
|
| 46 |
+
print("⚠️ No OCR results or unexpected format")
|
| 47 |
+
return []
|
| 48 |
+
|
| 49 |
+
ocr_data = results[0]
|
| 50 |
+
|
| 51 |
+
rec_texts = ocr_data.get("rec_texts", [])
|
| 52 |
+
rec_polys = ocr_data.get("rec_polys", [])
|
| 53 |
+
|
| 54 |
+
if not rec_texts or not rec_polys:
|
| 55 |
+
print("⚠️ Missing OCR text or polygons")
|
| 56 |
+
return []
|
| 57 |
+
|
| 58 |
+
# Step 1: Build list of (polygon, text)
|
| 59 |
lines = []
|
| 60 |
+
for poly, text in zip(rec_polys, rec_texts):
|
| 61 |
+
polygon = [(int(x), int(y)) for x, y in poly]
|
| 62 |
+
lines.append((polygon, text.strip()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
print("🔍 OCR Raw Output:", lines)
|
| 65 |
|
| 66 |
+
# Step 2: Group nearby text boxes
|
| 67 |
grouped = group_nearby_boxes(lines)
|
|
|
|
| 68 |
|
| 69 |
+
# Step 3: Translate and compute convex hulls
|
| 70 |
+
translations = []
|
| 71 |
for group in grouped:
|
| 72 |
polygons = group["polygons"]
|
| 73 |
merged_text = "".join(group["texts"]).strip()
|
|
|
|
| 81 |
print("⚠️ Translation failed:", e)
|
| 82 |
translated = ""
|
| 83 |
|
|
|
|
| 84 |
all_points = np.array([pt for polygon in polygons for pt in polygon])
|
| 85 |
if len(all_points) < 3:
|
| 86 |
+
continue # convex hull needs ≥ 3
|
| 87 |
|
| 88 |
hull_indices = ConvexHull(all_points).vertices
|
| 89 |
hull = [tuple(map(int, all_points[i])) for i in hull_indices]
|