receipt-ocr / main.py
sinful1's picture
fix(logging): use print(flush=True) for TIMING lines so they appear in HF logs
3a00f27
"""
Receipt OCR Service — FastAPI entry point.
Endpoints:
POST /ocr Upload a receipt image, get structured JSON back.
GET /health Liveness check.
Usage:
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
"""
import asyncio
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, File, HTTPException, Query, UploadFile
from fastapi.responses import JSONResponse
from ocr.reader import PaddleOCRReader
from ocr.parser import parse_blocks
from utils.image_prep import preprocess_image
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global reader instance — loaded once at startup
_reader: PaddleOCRReader | None = None
async def _background_warmup():
import numpy as np
logger.info("Background warm-up starting...")
try:
await asyncio.get_event_loop().run_in_executor(
None, lambda: _reader.extract(np.ones((200, 600, 3), dtype=np.uint8) * 255)
)
logger.info("Background warm-up complete.")
except Exception as exc:
logger.warning("Background warm-up failed (non-fatal): %s", exc)
@asynccontextmanager
async def lifespan(app: FastAPI):
global _reader
_reader = PaddleOCRReader()
# Run warm-up in background so the server starts accepting connections
# immediately. First real request after restart may still be slow (JIT),
# but the server won't appear unresponsive during startup.
asyncio.create_task(_background_warmup())
yield
_reader = None
app = FastAPI(
title="Receipt OCR API",
description="Extract structured data from receipt images using PaddleOCR.",
version="1.0.0",
lifespan=lifespan,
)
@app.get("/health")
async def health():
return {"status": "ok", "model_loaded": _reader is not None}
@app.get("/warmup")
async def warmup():
"""Run OCR on a blank image to exercise the inference engine. Used by keepalive cron."""
if _reader is None:
raise HTTPException(status_code=503, detail="OCR model not loaded yet")
import numpy as np
_reader.extract(np.ones((200, 600, 3), dtype=np.uint8) * 255)
return {"status": "warm"}
@app.post("/ocr")
async def ocr_receipt(
file: UploadFile = File(..., description="Receipt image (JPEG, PNG, etc.)"),
debug: bool = Query(False, description="Include raw OCR blocks in response"),
):
"""
Process a receipt image and return structured JSON.
- **file**: multipart image upload
- **debug**: if true, includes raw OCR text blocks in the response
"""
if _reader is None:
raise HTTPException(status_code=503, detail="OCR model not loaded yet")
# Validate content type loosely
content_type = file.content_type or ""
if content_type and not content_type.startswith("image/"):
raise HTTPException(
status_code=415,
detail=f"Unsupported media type: {content_type}. Upload an image file.",
)
import time
t0 = time.perf_counter()
raw_bytes = await file.read()
if not raw_bytes:
raise HTTPException(status_code=400, detail="Empty file uploaded")
try:
image = preprocess_image(raw_bytes)
except (ValueError, FileNotFoundError) as exc:
raise HTTPException(status_code=400, detail=f"Image preprocessing failed: {exc}")
t1 = time.perf_counter()
print(f"TIMING preprocess: {t1-t0:.2f}s | image size: {image.shape[1]}x{image.shape[0]} | upload: {len(raw_bytes)//1024}KB", flush=True)
try:
blocks = _reader.extract(image)
except Exception as exc:
logger.exception("OCR extraction failed")
raise HTTPException(status_code=500, detail=f"OCR failed: {exc}")
t2 = time.perf_counter()
print(f"TIMING ocr extract: {t2-t1:.2f}s | blocks found: {len(blocks)}", flush=True)
result = parse_blocks(blocks)
t3 = time.perf_counter()
print(f"TIMING parse: {t3-t2:.2f}s | items: {len(result.get('line_items', []))} | total: {t3-t0:.2f}s", flush=True)
if debug:
result["_raw_blocks"] = blocks
return JSONResponse(content=result)