Spaces:

chitrark
/

BookReader-ReachyMini

Sleeping

App Files Files Community

BookReader-ReachyMini / app.py

chitrark

Add bottom text cropping + cloud OCR fallback stub

1ee4116 verified about 1 month ago

raw

history blame contribute delete

2.73 kB

	import gradio as gr
	import numpy as np
	import cv2
	from paddleocr import PaddleOCR

	ocr = None # lazy init

	def get_ocr():
	global ocr
	if ocr is None:
	ocr = PaddleOCR(use_angle_cls=True, lang="en")
	return ocr

	def preprocess_for_ocr(pil_img):
	rgb = np.array(pil_img)
	if rgb.dtype != np.uint8:
	rgb = rgb.astype(np.uint8)

	bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

	# upscale helps thin kid-book text
	h, w = bgr.shape[:2]
	scale = 1.8
	bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC)

	# contrast boost
	lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
	l, a, b = cv2.split(lab)
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	l2 = clahe.apply(l)
	lab2 = cv2.merge([l2, a, b])
	bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)

	return bgr

	def crop_bottom_text_region(bgr):
	"""Kids books often place text at the bottom. Crop bottom ~35%."""
	h, w = bgr.shape[:2]
	return bgr[int(h * 0.65):h, 0:w]

	def cloud_ocr_stub(_pil_img):
	"""
	Placeholder for GPU/VLM OCR (olmOCR-2 / Nanonets OCR2).
	Later this will call an API endpoint.
	"""
	return "[Cloud OCR placeholder] PaddleOCR confidence was low. Next: call olmOCR-2 / OCR2 via API."

	def run_ocr(img):
	if img is None:
	return "(No image)", 0.0, "No"

	bgr = preprocess_for_ocr(img)
	bgr_crop = crop_bottom_text_region(bgr)

	ocr_engine = get_ocr()
	result = ocr_engine.ocr(bgr_crop)

	lines = []
	confs = []

	blocks = result[0] if isinstance(result, list) and result and isinstance(result[0], list) else result
	for item in blocks:
	try:
	text, conf = item[1]
	lines.append(str(text))
	confs.append(float(conf))
	except Exception:
	continue

	extracted = "\n".join(lines).strip()
	avg_conf = float(sum(confs) / len(confs)) if confs else 0.0

	# ---- fallback decision (simple + effective) ----
	needs_cloud = (avg_conf < 0.45) or (len(extracted) < 15)

	if needs_cloud:
	extracted = extracted if extracted else "(PaddleOCR found no text)\n\n" + cloud_ocr_stub(img)
	return extracted, avg_conf, "Yes"

	return extracted, avg_conf, "No"


	demo = gr.Interface(
	fn=run_ocr,
	inputs=gr.Image(type="pil", label="Upload a page photo"),
	outputs=[
	gr.Textbox(label="Extracted text", lines=12),
	gr.Number(label="Average confidence (0–1)"),
	gr.Textbox(label="Cloud fallback needed?", interactive=False),
	],
	title="BookReader × Reachy Mini",
	description="CPU PaddleOCR + smart fallback (VLM OCR stub). Crops bottom text region for kid books.",
	)

	demo.launch(ssr_mode=False)