from fastapi import FastAPI, UploadFile, File, Form from fastapi.responses import StreamingResponse, HTMLResponse from rapidocr_onnxruntime import RapidOCR from pdf2image import convert_from_bytes from PIL import Image import numpy as np import io import time import base64 import json import asyncio app = FastAPI() # --- BACKEND LOGIC --- engine = RapidOCR() def pil_to_base64(img: Image.Image) -> str: """Convert PIL image to efficient base64 string.""" buffered = io.BytesIO() img.save(buffered, format="JPEG", quality=70) return base64.b64encode(buffered.getvalue()).decode('utf-8') @app.get("/", response_class=HTMLResponse) def home(): return html_content @app.post("/predict") async def predict(file: UploadFile = File(...), use_angle_cls: bool = Form(False)): file_bytes = await file.read() async def process_stream(): images = [] try: if file.content_type == "application/pdf": yield json.dumps({"type": "status", "message": "Converting PDF..."}) + "\n" images = convert_from_bytes(file_bytes, last_page=5) else: yield json.dumps({"type": "status", "message": "Reading Image..."}) + "\n" image = Image.open(io.BytesIO(file_bytes)).convert("RGB") images = [image] except Exception as e: yield json.dumps({"type": "error", "message": str(e)}) + "\n" return yield json.dumps({"type": "meta", "total_pages": len(images)}) + "\n" for i, img in enumerate(images): start_page = time.time() img_np = np.array(img) try: result, _ = engine(img_np, use_angle_cls=use_angle_cls, use_det=True, use_rec=True) except TypeError: result, _ = engine(img_np) blocks = [] full_text_lines = [] if result: for idx, line in enumerate(result): box = line[0] text = line[1] conf = float(line[2]) full_text_lines.append(text) blocks.append({ "id": idx, "box": box, "text": text, "conf": round(conf, 4) }) page_data = { "type": "page", "page_number": i + 1, "img_base64": pil_to_base64(img), "full_text": "\n".join(full_text_lines), "blocks": blocks, "time": f"{time.time() - start_page:.2f}s" } yield json.dumps(page_data) + "\n" await asyncio.sleep(0.01) yield json.dumps({"type": "complete"}) + "\n" return StreamingResponse(process_stream(), media_type="application/x-ndjson") # --- FRONTEND --- html_content = """ OCR Studio
OCR STUDIO PRO
READY
DATA
PROCESSING
"""