Spaces:

chitrark
/

BookReader-ReachyMini

Paused

App Files Files Community

chitrark commited on Dec 17, 2025

Commit

46ffc07

verified ·

1 Parent(s): 876834c

Add preprocessing for PaddleOCR

Browse files

Files changed (1) hide show

app.py +30 -5

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import numpy as np
 from paddleocr import PaddleOCR
 ocr = None  # lazy init
@@ -10,14 +11,38 @@ def get_ocr():
         ocr = PaddleOCR(use_angle_cls=True, lang="en")
     return ocr
 def run_ocr(img):
     if img is None:
         return "(No image)", 0.0
-    img_np = np.array(img)
     ocr_engine = get_ocr()
-    result = ocr_engine.ocr(img_np)
     lines = []
     confs = []
@@ -27,13 +52,13 @@ def run_ocr(img):
     for item in blocks:
         try:
             text, conf = item[1]
-            lines.append(text)
             confs.append(float(conf))
         except Exception:
             continue
     extracted = "\n".join(lines).strip()
-    avg_conf = sum(confs) / len(confs) if confs else 0.0
     return extracted if extracted else "(No text detected)", avg_conf
@@ -45,7 +70,7 @@ demo = gr.Interface(
         gr.Number(label="Average confidence (0–1)")
     ],
     title="BookReader × Reachy Mini",
-    description="CPU-based PaddleOCR for physical book pages.",
 )
 demo.launch(ssr_mode=False)

 import gradio as gr
 import numpy as np
+import cv2
 from paddleocr import PaddleOCR
 ocr = None  # lazy init
         ocr = PaddleOCR(use_angle_cls=True, lang="en")
     return ocr
+def preprocess_for_ocr(pil_img):
+    # PIL -> RGB uint8
+    rgb = np.array(pil_img)
+    if rgb.dtype != np.uint8:
+        rgb = rgb.astype(np.uint8)
+    # RGB -> BGR (OpenCV format; PaddleOCR tends to behave better)
+    bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
+    # Upscale to help small/thin text
+    h, w = bgr.shape[:2]
+    scale = 1.6
+    bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)
+    # Contrast boost (CLAHE on luminance)
+    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
+    l, a, b = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+    l2 = clahe.apply(l)
+    lab2 = cv2.merge([l2, a, b])
+    bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)
+    return bgr
 def run_ocr(img):
     if img is None:
         return "(No image)", 0.0
+    bgr = preprocess_for_ocr(img)
     ocr_engine = get_ocr()
+    result = ocr_engine.ocr(bgr)
     lines = []
     confs = []
     for item in blocks:
         try:
             text, conf = item[1]
+            lines.append(str(text))
             confs.append(float(conf))
         except Exception:
             continue
     extracted = "\n".join(lines).strip()
+    avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
     return extracted if extracted else "(No text detected)", avg_conf
         gr.Number(label="Average confidence (0–1)")
     ],
     title="BookReader × Reachy Mini",
+    description="CPU-based PaddleOCR for physical book pages (with preprocessing).",
 )
 demo.launch(ssr_mode=False)