chitrark commited on
Commit
46ffc07
·
verified ·
1 Parent(s): 876834c

Add preprocessing for PaddleOCR

Browse files
Files changed (1) hide show
  1. app.py +30 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import numpy as np
 
3
  from paddleocr import PaddleOCR
4
 
5
  ocr = None # lazy init
@@ -10,14 +11,38 @@ def get_ocr():
10
  ocr = PaddleOCR(use_angle_cls=True, lang="en")
11
  return ocr
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def run_ocr(img):
14
  if img is None:
15
  return "(No image)", 0.0
16
 
17
- img_np = np.array(img)
18
 
19
  ocr_engine = get_ocr()
20
- result = ocr_engine.ocr(img_np)
21
 
22
  lines = []
23
  confs = []
@@ -27,13 +52,13 @@ def run_ocr(img):
27
  for item in blocks:
28
  try:
29
  text, conf = item[1]
30
- lines.append(text)
31
  confs.append(float(conf))
32
  except Exception:
33
  continue
34
 
35
  extracted = "\n".join(lines).strip()
36
- avg_conf = sum(confs) / len(confs) if confs else 0.0
37
 
38
  return extracted if extracted else "(No text detected)", avg_conf
39
 
@@ -45,7 +70,7 @@ demo = gr.Interface(
45
  gr.Number(label="Average confidence (0–1)")
46
  ],
47
  title="BookReader × Reachy Mini",
48
- description="CPU-based PaddleOCR for physical book pages.",
49
  )
50
 
51
  demo.launch(ssr_mode=False)
 
1
  import gradio as gr
2
  import numpy as np
3
+ import cv2
4
  from paddleocr import PaddleOCR
5
 
6
  ocr = None # lazy init
 
11
  ocr = PaddleOCR(use_angle_cls=True, lang="en")
12
  return ocr
13
 
14
+ def preprocess_for_ocr(pil_img):
15
+ # PIL -> RGB uint8
16
+ rgb = np.array(pil_img)
17
+ if rgb.dtype != np.uint8:
18
+ rgb = rgb.astype(np.uint8)
19
+
20
+ # RGB -> BGR (OpenCV format; PaddleOCR tends to behave better)
21
+ bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
22
+
23
+ # Upscale to help small/thin text
24
+ h, w = bgr.shape[:2]
25
+ scale = 1.6
26
+ bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)
27
+
28
+ # Contrast boost (CLAHE on luminance)
29
+ lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
30
+ l, a, b = cv2.split(lab)
31
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
32
+ l2 = clahe.apply(l)
33
+ lab2 = cv2.merge([l2, a, b])
34
+ bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)
35
+
36
+ return bgr
37
+
38
  def run_ocr(img):
39
  if img is None:
40
  return "(No image)", 0.0
41
 
42
+ bgr = preprocess_for_ocr(img)
43
 
44
  ocr_engine = get_ocr()
45
+ result = ocr_engine.ocr(bgr)
46
 
47
  lines = []
48
  confs = []
 
52
  for item in blocks:
53
  try:
54
  text, conf = item[1]
55
+ lines.append(str(text))
56
  confs.append(float(conf))
57
  except Exception:
58
  continue
59
 
60
  extracted = "\n".join(lines).strip()
61
+ avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
62
 
63
  return extracted if extracted else "(No text detected)", avg_conf
64
 
 
70
  gr.Number(label="Average confidence (0–1)")
71
  ],
72
  title="BookReader × Reachy Mini",
73
+ description="CPU-based PaddleOCR for physical book pages (with preprocessing).",
74
  )
75
 
76
  demo.launch(ssr_mode=False)