Spaces:

chitrark
/

BookReader-ReachyMini

Paused

chitrark commited on Dec 17, 2025

Commit

a8abba0

verified ·

1 Parent(s): 7066420

Add PaddleOCR text extraction

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,43 @@
 import gradio as gr
-def placeholder(img):
-    return "✅ Image received. Next step: OCR + read-aloud."
 demo = gr.Interface(
-    fn=placeholder,
     inputs=gr.Image(type="pil", label="Upload a page photo"),
-    outputs="text",
     title="BookReader × Reachy Mini",
-    description="Upload a photo of a physical children’s book page. OCR and read-aloud coming next."
 )
 demo.launch()

 import gradio as gr
+import numpy as np
+from paddleocr import PaddleOCR
+# Initialize once (important for speed)
+ocr = PaddleOCR(use_angle_cls=True, lang="en")
+def run_ocr(img):
+    if img is None:
+        return "", 0.0
+    # img is PIL.Image -> convert to numpy array (RGB)
+    img_np = np.array(img)
+    result = ocr.ocr(img_np, cls=True)
+    lines = []
+    confs = []
+    # result format: [ [ [box], (text, conf) ], ... ]
+    for block in result:
+        for item in block:
+            text, conf = item[1]
+            lines.append(text)
+            confs.append(conf)
+    extracted = "\n".join(lines).strip()
+    avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
+    return extracted if extracted else "(No text detected)", avg_conf
 demo = gr.Interface(
+    fn=run_ocr,
     inputs=gr.Image(type="pil", label="Upload a page photo"),
+    outputs=[
+        gr.Textbox(label="Extracted text", lines=12),
+        gr.Number(label="Average confidence (0–1)"),
+    ],
     title="BookReader × Reachy Mini",
+    description="Upload a photo of a physical book page. PaddleOCR runs on CPU and returns extracted text + confidence.",
 )
 demo.launch()