chitrark commited on
Commit
a8abba0
·
verified ·
1 Parent(s): 7066420

Add PaddleOCR text extraction

Browse files
Files changed (1) hide show
  1. app.py +34 -5
app.py CHANGED
@@ -1,14 +1,43 @@
1
  import gradio as gr
 
 
2
 
3
- def placeholder(img):
4
- return "✅ Image received. Next step: OCR + read-aloud."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  demo = gr.Interface(
7
- fn=placeholder,
8
  inputs=gr.Image(type="pil", label="Upload a page photo"),
9
- outputs="text",
 
 
 
10
  title="BookReader × Reachy Mini",
11
- description="Upload a photo of a physical children’s book page. OCR and read-aloud coming next."
12
  )
13
 
14
  demo.launch()
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from paddleocr import PaddleOCR
4
 
5
+ # Initialize once (important for speed)
6
+ ocr = PaddleOCR(use_angle_cls=True, lang="en")
7
+
8
+ def run_ocr(img):
9
+ if img is None:
10
+ return "", 0.0
11
+
12
+ # img is PIL.Image -> convert to numpy array (RGB)
13
+ img_np = np.array(img)
14
+
15
+ result = ocr.ocr(img_np, cls=True)
16
+
17
+ lines = []
18
+ confs = []
19
+
20
+ # result format: [ [ [box], (text, conf) ], ... ]
21
+ for block in result:
22
+ for item in block:
23
+ text, conf = item[1]
24
+ lines.append(text)
25
+ confs.append(conf)
26
+
27
+ extracted = "\n".join(lines).strip()
28
+ avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
29
+
30
+ return extracted if extracted else "(No text detected)", avg_conf
31
 
32
  demo = gr.Interface(
33
+ fn=run_ocr,
34
  inputs=gr.Image(type="pil", label="Upload a page photo"),
35
+ outputs=[
36
+ gr.Textbox(label="Extracted text", lines=12),
37
+ gr.Number(label="Average confidence (0–1)"),
38
+ ],
39
  title="BookReader × Reachy Mini",
40
+ description="Upload a photo of a physical book page. PaddleOCR runs on CPU and returns extracted text + confidence.",
41
  )
42
 
43
  demo.launch()