Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| from paddleocr import PaddleOCR | |
| ocr = None # lazy init | |
| def get_ocr(): | |
| global ocr | |
| if ocr is None: | |
| ocr = PaddleOCR(use_angle_cls=True, lang="en") | |
| return ocr | |
| def preprocess_for_ocr(pil_img): | |
| rgb = np.array(pil_img) | |
| if rgb.dtype != np.uint8: | |
| rgb = rgb.astype(np.uint8) | |
| bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) | |
| # upscale helps thin kid-book text | |
| h, w = bgr.shape[:2] | |
| scale = 1.8 | |
| bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC) | |
| # contrast boost | |
| lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(lab) | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
| l2 = clahe.apply(l) | |
| lab2 = cv2.merge([l2, a, b]) | |
| bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR) | |
| return bgr | |
| def crop_bottom_text_region(bgr): | |
| """Kids books often place text at the bottom. Crop bottom ~35%.""" | |
| h, w = bgr.shape[:2] | |
| return bgr[int(h * 0.65):h, 0:w] | |
| def cloud_ocr_stub(_pil_img): | |
| """ | |
| Placeholder for GPU/VLM OCR (olmOCR-2 / Nanonets OCR2). | |
| Later this will call an API endpoint. | |
| """ | |
| return "[Cloud OCR placeholder] PaddleOCR confidence was low. Next: call olmOCR-2 / OCR2 via API." | |
| def run_ocr(img): | |
| if img is None: | |
| return "(No image)", 0.0, "No" | |
| bgr = preprocess_for_ocr(img) | |
| bgr_crop = crop_bottom_text_region(bgr) | |
| ocr_engine = get_ocr() | |
| result = ocr_engine.ocr(bgr_crop) | |
| lines = [] | |
| confs = [] | |
| blocks = result[0] if isinstance(result, list) and result and isinstance(result[0], list) else result | |
| for item in blocks: | |
| try: | |
| text, conf = item[1] | |
| lines.append(str(text)) | |
| confs.append(float(conf)) | |
| except Exception: | |
| continue | |
| extracted = "\n".join(lines).strip() | |
| avg_conf = float(sum(confs) / len(confs)) if confs else 0.0 | |
| # ---- fallback decision (simple + effective) ---- | |
| needs_cloud = (avg_conf < 0.45) or (len(extracted) < 15) | |
| if needs_cloud: | |
| extracted = extracted if extracted else "(PaddleOCR found no text)\n\n" + cloud_ocr_stub(img) | |
| return extracted, avg_conf, "Yes" | |
| return extracted, avg_conf, "No" | |
| demo = gr.Interface( | |
| fn=run_ocr, | |
| inputs=gr.Image(type="pil", label="Upload a page photo"), | |
| outputs=[ | |
| gr.Textbox(label="Extracted text", lines=12), | |
| gr.Number(label="Average confidence (0–1)"), | |
| gr.Textbox(label="Cloud fallback needed?", interactive=False), | |
| ], | |
| title="BookReader × Reachy Mini", | |
| description="CPU PaddleOCR + smart fallback (VLM OCR stub). Crops bottom text region for kid books.", | |
| ) | |
| demo.launch(ssr_mode=False) | |