chitrark commited on
Commit
c71aa5b
·
verified ·
1 Parent(s): 330c2b1

Fix OCR result parsing

Browse files
Files changed (1) hide show
  1. app.py +40 -29
app.py CHANGED
@@ -1,43 +1,54 @@
1
- import gradio as gr
2
- import numpy as np
3
- from paddleocr import PaddleOCR
4
-
5
- # Initialize once (important for speed)
6
- ocr = PaddleOCR(use_angle_cls=True, lang="en")
7
-
8
  def run_ocr(img):
9
  if img is None:
10
  return "", 0.0
11
 
12
- img_np = np.array(img) # PIL -> numpy (RGB)
13
 
14
- # No cls arg here (handled by use_angle_cls=True above)
15
- result = ocr.ocr(img_np)
16
 
17
  lines = []
18
  confs = []
19
 
20
- # result is typically: [ [ [box], (text, conf) ], ... ]
21
- for block in result:
22
- for item in block:
23
- text, conf = item[1]
24
- lines.append(text)
25
- confs.append(conf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  extracted = "\n".join(lines).strip()
28
  avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
29
 
30
  return extracted if extracted else "(No text detected)", avg_conf
31
-
32
- demo = gr.Interface(
33
- fn=run_ocr,
34
- inputs=gr.Image(type="pil", label="Upload a page photo"),
35
- outputs=[
36
- gr.Textbox(label="Extracted text", lines=12),
37
- gr.Number(label="Average confidence (0–1)"),
38
- ],
39
- title="BookReader × Reachy Mini",
40
- description="Upload a photo of a physical book page. PaddleOCR runs on CPU and returns extracted text + confidence.",
41
- )
42
-
43
- demo.launch()
 
 
 
 
 
 
 
 
1
  def run_ocr(img):
2
  if img is None:
3
  return "", 0.0
4
 
5
+ img_np = np.array(img)
6
 
7
+ result = ocr.ocr(img_np) # no cls arg
 
8
 
9
  lines = []
10
  confs = []
11
 
12
+ # result can be:
13
+ # - [ [ [box, (text, conf)], ... ] ] (nested)
14
+ # - [ [box, (text, conf)], ... ] (less nested)
15
+ # - other variants depending on version
16
+ blocks = result
17
+ if isinstance(result, list) and len(result) == 1 and isinstance(result[0], list):
18
+ # sometimes it's wrapped one extra level
19
+ blocks = result[0]
20
+
21
+ for item in blocks:
22
+ if not item:
23
+ continue
24
+
25
+ # item might be: [box, (text, conf)] OR [[box], [text, conf]] etc.
26
+ # Try common case first
27
+ try:
28
+ maybe = item[1]
29
+ if isinstance(maybe, (list, tuple)) and len(maybe) >= 2:
30
+ text, conf = maybe[0], float(maybe[1])
31
+ if text:
32
+ lines.append(str(text))
33
+ confs.append(conf)
34
+ continue
35
+ except Exception:
36
+ pass
37
+
38
+ # Fallback: search inside item for a (text, conf) pair
39
+ found = False
40
+ for part in item:
41
+ if isinstance(part, (list, tuple)) and len(part) >= 2:
42
+ # part could be (text, conf) or [text, conf]
43
+ if isinstance(part[0], str) and isinstance(part[1], (float, int)):
44
+ lines.append(part[0])
45
+ confs.append(float(part[1]))
46
+ found = True
47
+ break
48
+ if found:
49
+ continue
50
 
51
  extracted = "\n".join(lines).strip()
52
  avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
53
 
54
  return extracted if extracted else "(No text detected)", avg_conf