""" Hosted document parser API for Fresh Catch Inventory. Deploy to Hugging Face Spaces (Docker), Fly.io, or any VM with Python + Tesseract. """ from __future__ import annotations import os import subprocess import sys import tempfile from pathlib import Path from fastapi import FastAPI, File, Header, HTTPException, Query, UploadFile from fastapi.middleware.cors import CORSMiddleware REPO_ROOT = Path(__file__).resolve().parents[2] PARSE_SCRIPT = REPO_ROOT / "scripts" / "parse_vendor_document.py" SERVICE_SECRET = os.environ.get("DOCUMENT_PARSER_SERVICE_SECRET", "").strip() app = FastAPI(title="Fresh Catch Document Parser", version="1.0.0") app.add_middleware( CORSMiddleware, allow_origins=os.environ.get("DOCUMENT_PARSER_CORS_ORIGINS", "*").split(","), allow_credentials=True, allow_methods=["POST", "GET"], allow_headers=["*"], ) def verify_auth(authorization: str | None) -> None: if not SERVICE_SECRET: return if not authorization or authorization != f"Bearer {SERVICE_SECRET}": raise HTTPException(status_code=401, detail="Unauthorized") @app.get("/health") def health() -> dict[str, str]: return {"status": "ok"} @app.post("/parse") async def parse_document( file: UploadFile = File(...), type: str = Query("auto", pattern="^(auto|invoice|receipt)$"), authorization: str | None = Header(default=None), ) -> dict: verify_auth(authorization) if not PARSE_SCRIPT.exists(): raise HTTPException(status_code=500, detail="parse_vendor_document.py not found") contents = await file.read() if not contents: raise HTTPException(status_code=400, detail="Empty file") suffix = Path(file.filename or "upload.png").suffix or ".png" with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: tmp.write(contents) image_path = tmp.name try: completed = subprocess.run( [sys.executable, str(PARSE_SCRIPT), "--image", image_path, "--type", type], capture_output=True, text=True, timeout=int(os.environ.get("DOCUMENT_PARSER_TIMEOUT_MS", "120000")) // 1000, cwd=str(REPO_ROOT), ) finally: Path(image_path).unlink(missing_ok=True) if completed.returncode != 0: detail = (completed.stderr or completed.stdout or "Parse failed").strip() raise HTTPException(status_code=500, detail=detail[:2000]) import json try: return json.loads(completed.stdout) except json.JSONDecodeError as error: raise HTTPException(status_code=500, detail=f"Invalid parser output: {error}") from error