Add preprocessing for PaddleOCR
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
|
|
|
| 3 |
from paddleocr import PaddleOCR
|
| 4 |
|
| 5 |
ocr = None # lazy init
|
|
@@ -10,14 +11,38 @@ def get_ocr():
|
|
| 10 |
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
| 11 |
return ocr
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def run_ocr(img):
|
| 14 |
if img is None:
|
| 15 |
return "(No image)", 0.0
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
ocr_engine = get_ocr()
|
| 20 |
-
result = ocr_engine.ocr(
|
| 21 |
|
| 22 |
lines = []
|
| 23 |
confs = []
|
|
@@ -27,13 +52,13 @@ def run_ocr(img):
|
|
| 27 |
for item in blocks:
|
| 28 |
try:
|
| 29 |
text, conf = item[1]
|
| 30 |
-
lines.append(text)
|
| 31 |
confs.append(float(conf))
|
| 32 |
except Exception:
|
| 33 |
continue
|
| 34 |
|
| 35 |
extracted = "\n".join(lines).strip()
|
| 36 |
-
avg_conf = sum(confs) / len(confs) if confs else 0.0
|
| 37 |
|
| 38 |
return extracted if extracted else "(No text detected)", avg_conf
|
| 39 |
|
|
@@ -45,7 +70,7 @@ demo = gr.Interface(
|
|
| 45 |
gr.Number(label="Average confidence (0–1)")
|
| 46 |
],
|
| 47 |
title="BookReader × Reachy Mini",
|
| 48 |
-
description="CPU-based PaddleOCR for physical book pages.",
|
| 49 |
)
|
| 50 |
|
| 51 |
demo.launch(ssr_mode=False)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
from paddleocr import PaddleOCR
|
| 5 |
|
| 6 |
ocr = None # lazy init
|
|
|
|
| 11 |
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
| 12 |
return ocr
|
| 13 |
|
| 14 |
+
def preprocess_for_ocr(pil_img):
|
| 15 |
+
# PIL -> RGB uint8
|
| 16 |
+
rgb = np.array(pil_img)
|
| 17 |
+
if rgb.dtype != np.uint8:
|
| 18 |
+
rgb = rgb.astype(np.uint8)
|
| 19 |
+
|
| 20 |
+
# RGB -> BGR (OpenCV format; PaddleOCR tends to behave better)
|
| 21 |
+
bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
|
| 22 |
+
|
| 23 |
+
# Upscale to help small/thin text
|
| 24 |
+
h, w = bgr.shape[:2]
|
| 25 |
+
scale = 1.6
|
| 26 |
+
bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)
|
| 27 |
+
|
| 28 |
+
# Contrast boost (CLAHE on luminance)
|
| 29 |
+
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
|
| 30 |
+
l, a, b = cv2.split(lab)
|
| 31 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 32 |
+
l2 = clahe.apply(l)
|
| 33 |
+
lab2 = cv2.merge([l2, a, b])
|
| 34 |
+
bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)
|
| 35 |
+
|
| 36 |
+
return bgr
|
| 37 |
+
|
| 38 |
def run_ocr(img):
|
| 39 |
if img is None:
|
| 40 |
return "(No image)", 0.0
|
| 41 |
|
| 42 |
+
bgr = preprocess_for_ocr(img)
|
| 43 |
|
| 44 |
ocr_engine = get_ocr()
|
| 45 |
+
result = ocr_engine.ocr(bgr)
|
| 46 |
|
| 47 |
lines = []
|
| 48 |
confs = []
|
|
|
|
| 52 |
for item in blocks:
|
| 53 |
try:
|
| 54 |
text, conf = item[1]
|
| 55 |
+
lines.append(str(text))
|
| 56 |
confs.append(float(conf))
|
| 57 |
except Exception:
|
| 58 |
continue
|
| 59 |
|
| 60 |
extracted = "\n".join(lines).strip()
|
| 61 |
+
avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
|
| 62 |
|
| 63 |
return extracted if extracted else "(No text detected)", avg_conf
|
| 64 |
|
|
|
|
| 70 |
gr.Number(label="Average confidence (0–1)")
|
| 71 |
],
|
| 72 |
title="BookReader × Reachy Mini",
|
| 73 |
+
description="CPU-based PaddleOCR for physical book pages (with preprocessing).",
|
| 74 |
)
|
| 75 |
|
| 76 |
demo.launch(ssr_mode=False)
|