File size: 2,734 Bytes
6fefcc0
 
46ffc07
6fefcc0
 
 
 
 
 
 
 
 
 
46ffc07
 
 
 
 
 
 
1ee4116
46ffc07
1ee4116
46ffc07
 
1ee4116
46ffc07
 
 
 
 
 
 
 
 
1ee4116
 
 
 
 
 
 
 
 
 
 
 
a8abba0
 
1ee4116
a8abba0
46ffc07
1ee4116
a8abba0
6fefcc0
1ee4116
a8abba0
 
 
 
6fefcc0
c71aa5b
 
6fefcc0
46ffc07
6fefcc0
c71aa5b
 
a8abba0
 
46ffc07
a8abba0
1ee4116
 
 
 
 
 
 
 
 
6fefcc0
 
 
 
 
 
1ee4116
 
6fefcc0
 
1ee4116
6fefcc0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import numpy as np
import cv2
from paddleocr import PaddleOCR

ocr = None  # lazy init

def get_ocr():
    global ocr
    if ocr is None:
        ocr = PaddleOCR(use_angle_cls=True, lang="en")
    return ocr

def preprocess_for_ocr(pil_img):
    rgb = np.array(pil_img)
    if rgb.dtype != np.uint8:
        rgb = rgb.astype(np.uint8)

    bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

    # upscale helps thin kid-book text
    h, w = bgr.shape[:2]
    scale = 1.8
    bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)

    # contrast boost
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l2 = clahe.apply(l)
    lab2 = cv2.merge([l2, a, b])
    bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)

    return bgr

def crop_bottom_text_region(bgr):
    """Kids books often place text at the bottom. Crop bottom ~35%."""
    h, w = bgr.shape[:2]
    return bgr[int(h * 0.65):h, 0:w]

def cloud_ocr_stub(_pil_img):
    """
    Placeholder for GPU/VLM OCR (olmOCR-2 / Nanonets OCR2).
    Later this will call an API endpoint.
    """
    return "[Cloud OCR placeholder] PaddleOCR confidence was low. Next: call olmOCR-2 / OCR2 via API."

def run_ocr(img):
    if img is None:
        return "(No image)", 0.0, "No"

    bgr = preprocess_for_ocr(img)
    bgr_crop = crop_bottom_text_region(bgr)

    ocr_engine = get_ocr()
    result = ocr_engine.ocr(bgr_crop)

    lines = []
    confs = []

    blocks = result[0] if isinstance(result, list) and result and isinstance(result[0], list) else result
    for item in blocks:
        try:
            text, conf = item[1]
            lines.append(str(text))
            confs.append(float(conf))
        except Exception:
            continue

    extracted = "\n".join(lines).strip()
    avg_conf = float(sum(confs) / len(confs)) if confs else 0.0

    # ---- fallback decision (simple + effective) ----
    needs_cloud = (avg_conf < 0.45) or (len(extracted) < 15)

    if needs_cloud:
        extracted = extracted if extracted else "(PaddleOCR found no text)\n\n" + cloud_ocr_stub(img)
        return extracted, avg_conf, "Yes"

    return extracted, avg_conf, "No"


demo = gr.Interface(
    fn=run_ocr,
    inputs=gr.Image(type="pil", label="Upload a page photo"),
    outputs=[
        gr.Textbox(label="Extracted text", lines=12),
        gr.Number(label="Average confidence (0–1)"),
        gr.Textbox(label="Cloud fallback needed?", interactive=False),
    ],
    title="BookReader × Reachy Mini",
    description="CPU PaddleOCR + smart fallback (VLM OCR stub). Crops bottom text region for kid books.",
)

demo.launch(ssr_mode=False)