qqwjq1981 commited on
Commit
f15a377
·
verified ·
1 Parent(s): c98b84d

Update utils/ocr_utils.py

Browse files
Files changed (1) hide show
  1. utils/ocr_utils.py +31 -43
utils/ocr_utils.py CHANGED
@@ -6,72 +6,59 @@ ocr_model = PaddleOCR(use_textline_orientation=True, lang='ch')
6
 
7
  def group_nearby_boxes(lines, max_y_gap=50):
8
  groups = []
9
- used = set()
10
-
11
- def is_valid_polygon(poly):
12
- return isinstance(poly, (list, tuple)) and all(
13
- isinstance(p, (list, tuple)) and len(p) == 2 for p in poly
14
- )
15
 
16
  for i, (poly_i, text_i) in enumerate(lines):
17
- if i in used or not is_valid_polygon(poly_i):
18
  continue
19
 
20
- group = [(poly_i, text_i)]
21
- used.add(i)
 
22
 
23
- xi_min, yi_min = min(pt[1] for pt in poly_i), min(pt[0] for pt in poly_i)
24
- xi_max, yi_max = max(pt[1] for pt in poly_i), max(pt[0] for pt in poly_i)
25
 
26
  for j, (poly_j, text_j) in enumerate(lines):
27
- if j in used or not is_valid_polygon(poly_j):
28
  continue
29
-
30
- xj_min, yj_min = min(pt[1] for pt in poly_j), min(pt[0] for pt in poly_j)
31
- xj_max, yj_max = max(pt[1] for pt in poly_j), max(pt[0] for pt in poly_j)
32
-
33
- if abs(yj_min - yi_min) < max_y_gap or abs(yj_max - yi_max) < max_y_gap:
34
- group.append((poly_j, text_j))
35
- used.add(j)
36
-
37
- groups.append(group)
38
-
 
 
 
39
  return groups
40
 
 
41
  def extract_and_translate_chunk(image: Image.Image):
42
  np_img = np.array(image)
43
  results = ocr_model.ocr(np_img)
44
 
45
- if not results or not isinstance(results[0], dict):
46
- print("⚠️ No OCR results or unexpected format")
47
  return []
48
 
49
- ocr_data = results[0]
50
-
51
- rec_texts = ocr_data.get("rec_texts", [])
52
- rec_polys = ocr_data.get("rec_polys", [])
53
-
54
- if not rec_texts or not rec_polys:
55
- print("⚠️ Missing OCR text or polygons")
56
- return []
57
-
58
- # Step 1: Build list of (polygon, text)
59
- lines = []
60
- for poly, text in zip(rec_polys, rec_texts):
61
- polygon = [(int(x), int(y)) for x, y in poly]
62
- lines.append((polygon, text.strip()))
63
 
64
  print("🔍 OCR Raw Output:", lines)
65
 
66
- # Step 2: Group nearby text boxes
67
  grouped = group_nearby_boxes(lines)
68
-
69
- # Step 3: Translate and compute convex hulls
70
  translations = []
 
71
  for group in grouped:
72
  polygons = group["polygons"]
73
- merged_text = "".join(group["texts"]).strip()
74
 
 
75
  if not merged_text:
76
  continue
77
 
@@ -81,9 +68,10 @@ def extract_and_translate_chunk(image: Image.Image):
81
  print("⚠️ Translation failed:", e)
82
  translated = ""
83
 
 
84
  all_points = np.array([pt for polygon in polygons for pt in polygon])
85
  if len(all_points) < 3:
86
- continue # convex hull needs ≥ 3
87
 
88
  hull_indices = ConvexHull(all_points).vertices
89
  hull = [tuple(map(int, all_points[i])) for i in hull_indices]
 
6
 
7
  def group_nearby_boxes(lines, max_y_gap=50):
8
  groups = []
9
+ used = [False] * len(lines)
 
 
 
 
 
10
 
11
  for i, (poly_i, text_i) in enumerate(lines):
12
+ if used[i]:
13
  continue
14
 
15
+ group_polys = [poly_i]
16
+ group_texts = [text_i]
17
+ used[i] = True
18
 
19
+ xi_center = np.mean([p[0] for p in poly_i])
20
+ yi_center = np.mean([p[1] for p in poly_i])
21
 
22
  for j, (poly_j, text_j) in enumerate(lines):
23
+ if used[j]:
24
  continue
25
+ xj_center = np.mean([p[0] for p in poly_j])
26
+ yj_center = np.mean([p[1] for p in poly_j])
27
+
28
+ # If Y-distance is small, consider them in the same group (horizontal alignment)
29
+ if abs(yj_center - yi_center) < max_y_gap:
30
+ group_polys.append(poly_j)
31
+ group_texts.append(text_j)
32
+ used[j] = True
33
+
34
+ groups.append({
35
+ "polygons": group_polys,
36
+ "texts": group_texts
37
+ })
38
  return groups
39
 
40
+
41
  def extract_and_translate_chunk(image: Image.Image):
42
  np_img = np.array(image)
43
  results = ocr_model.ocr(np_img)
44
 
45
+ # Handle both PaddleOCR and fallback
46
+ if not results or not results[0]:
47
  return []
48
 
49
+ lines = results[0] # [(polygon, text), ...]
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  print("🔍 OCR Raw Output:", lines)
52
 
53
+ # Group nearby lines into combined regions
54
  grouped = group_nearby_boxes(lines)
 
 
55
  translations = []
56
+
57
  for group in grouped:
58
  polygons = group["polygons"]
59
+ texts = group["texts"]
60
 
61
+ merged_text = "".join(texts).strip()
62
  if not merged_text:
63
  continue
64
 
 
68
  print("⚠️ Translation failed:", e)
69
  translated = ""
70
 
71
+ # Compute convex hull from all polygon points
72
  all_points = np.array([pt for polygon in polygons for pt in polygon])
73
  if len(all_points) < 3:
74
+ continue
75
 
76
  hull_indices = ConvexHull(all_points).vertices
77
  hull = [tuple(map(int, all_points[i])) for i in hull_indices]