Spaces:

chitrark
/

BookReader-ReachyMini

Paused

App Files Files Community

chitrark commited on Dec 17, 2025

Commit

c71aa5b

verified ·

1 Parent(s): 330c2b1

Fix OCR result parsing

Browse files

Files changed (1) hide show

app.py +40 -29

app.py CHANGED Viewed

@@ -1,43 +1,54 @@
-import gradio as gr
-import numpy as np
-from paddleocr import PaddleOCR
-# Initialize once (important for speed)
-ocr = PaddleOCR(use_angle_cls=True, lang="en")
 def run_ocr(img):
     if img is None:
         return "", 0.0
-    img_np = np.array(img)  # PIL -> numpy (RGB)
-    # No cls arg here (handled by use_angle_cls=True above)
-    result = ocr.ocr(img_np)
     lines = []
     confs = []
-    # result is typically: [ [ [box], (text, conf) ], ... ]
-    for block in result:
-        for item in block:
-            text, conf = item[1]
-            lines.append(text)
-            confs.append(conf)
     extracted = "\n".join(lines).strip()
     avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
     return extracted if extracted else "(No text detected)", avg_conf
-demo = gr.Interface(
-    fn=run_ocr,
-    inputs=gr.Image(type="pil", label="Upload a page photo"),
-    outputs=[
-        gr.Textbox(label="Extracted text", lines=12),
-        gr.Number(label="Average confidence (0–1)"),
-    ],
-    title="BookReader × Reachy Mini",
-    description="Upload a photo of a physical book page. PaddleOCR runs on CPU and returns extracted text + confidence.",
-)
-demo.launch()

 def run_ocr(img):
     if img is None:
         return "", 0.0
+    img_np = np.array(img)
+    result = ocr.ocr(img_np)  # no cls arg
     lines = []
     confs = []
+    # result can be:
+    #  - [ [ [box, (text, conf)], ... ] ]  (nested)
+    #  - [ [box, (text, conf)], ... ]      (less nested)
+    #  - other variants depending on version
+    blocks = result
+    if isinstance(result, list) and len(result) == 1 and isinstance(result[0], list):
+        # sometimes it's wrapped one extra level
+        blocks = result[0]
+    for item in blocks:
+        if not item:
+            continue
+        # item might be: [box, (text, conf)]  OR  [[box], [text, conf]] etc.
+        # Try common case first
+        try:
+            maybe = item[1]
+            if isinstance(maybe, (list, tuple)) and len(maybe) >= 2:
+                text, conf = maybe[0], float(maybe[1])
+                if text:
+                    lines.append(str(text))
+                    confs.append(conf)
+                continue
+        except Exception:
+            pass
+        # Fallback: search inside item for a (text, conf) pair
+        found = False
+        for part in item:
+            if isinstance(part, (list, tuple)) and len(part) >= 2:
+                # part could be (text, conf) or [text, conf]
+                if isinstance(part[0], str) and isinstance(part[1], (float, int)):
+                    lines.append(part[0])
+                    confs.append(float(part[1]))
+                    found = True
+                    break
+        if found:
+            continue
     extracted = "\n".join(lines).strip()
     avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
     return extracted if extracted else "(No text detected)", avg_conf