File size: 2,734 Bytes
6fefcc0 46ffc07 6fefcc0 46ffc07 1ee4116 46ffc07 1ee4116 46ffc07 1ee4116 46ffc07 1ee4116 a8abba0 1ee4116 a8abba0 46ffc07 1ee4116 a8abba0 6fefcc0 1ee4116 a8abba0 6fefcc0 c71aa5b 6fefcc0 46ffc07 6fefcc0 c71aa5b a8abba0 46ffc07 a8abba0 1ee4116 6fefcc0 1ee4116 6fefcc0 1ee4116 6fefcc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
import numpy as np
import cv2
from paddleocr import PaddleOCR
ocr = None # lazy init
def get_ocr():
global ocr
if ocr is None:
ocr = PaddleOCR(use_angle_cls=True, lang="en")
return ocr
def preprocess_for_ocr(pil_img):
rgb = np.array(pil_img)
if rgb.dtype != np.uint8:
rgb = rgb.astype(np.uint8)
bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
# upscale helps thin kid-book text
h, w = bgr.shape[:2]
scale = 1.8
bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)
# contrast boost
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
l2 = clahe.apply(l)
lab2 = cv2.merge([l2, a, b])
bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)
return bgr
def crop_bottom_text_region(bgr):
"""Kids books often place text at the bottom. Crop bottom ~35%."""
h, w = bgr.shape[:2]
return bgr[int(h * 0.65):h, 0:w]
def cloud_ocr_stub(_pil_img):
"""
Placeholder for GPU/VLM OCR (olmOCR-2 / Nanonets OCR2).
Later this will call an API endpoint.
"""
return "[Cloud OCR placeholder] PaddleOCR confidence was low. Next: call olmOCR-2 / OCR2 via API."
def run_ocr(img):
if img is None:
return "(No image)", 0.0, "No"
bgr = preprocess_for_ocr(img)
bgr_crop = crop_bottom_text_region(bgr)
ocr_engine = get_ocr()
result = ocr_engine.ocr(bgr_crop)
lines = []
confs = []
blocks = result[0] if isinstance(result, list) and result and isinstance(result[0], list) else result
for item in blocks:
try:
text, conf = item[1]
lines.append(str(text))
confs.append(float(conf))
except Exception:
continue
extracted = "\n".join(lines).strip()
avg_conf = float(sum(confs) / len(confs)) if confs else 0.0
# ---- fallback decision (simple + effective) ----
needs_cloud = (avg_conf < 0.45) or (len(extracted) < 15)
if needs_cloud:
extracted = extracted if extracted else "(PaddleOCR found no text)\n\n" + cloud_ocr_stub(img)
return extracted, avg_conf, "Yes"
return extracted, avg_conf, "No"
demo = gr.Interface(
fn=run_ocr,
inputs=gr.Image(type="pil", label="Upload a page photo"),
outputs=[
gr.Textbox(label="Extracted text", lines=12),
gr.Number(label="Average confidence (0–1)"),
gr.Textbox(label="Cloud fallback needed?", interactive=False),
],
title="BookReader × Reachy Mini",
description="CPU PaddleOCR + smart fallback (VLM OCR stub). Crops bottom text region for kid books.",
)
demo.launch(ssr_mode=False)
|