Spaces:
Sleeping
Sleeping
| """ | |
| Hosted document parser API for Fresh Catch Inventory. | |
| Deploy to Hugging Face Spaces (Docker), Fly.io, or any VM with Python + Tesseract. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import subprocess | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from fastapi import FastAPI, File, Header, HTTPException, Query, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| REPO_ROOT = Path(__file__).resolve().parents[2] | |
| PARSE_SCRIPT = REPO_ROOT / "scripts" / "parse_vendor_document.py" | |
| SERVICE_SECRET = os.environ.get("DOCUMENT_PARSER_SERVICE_SECRET", "").strip() | |
| app = FastAPI(title="Fresh Catch Document Parser", version="1.0.0") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=os.environ.get("DOCUMENT_PARSER_CORS_ORIGINS", "*").split(","), | |
| allow_credentials=True, | |
| allow_methods=["POST", "GET"], | |
| allow_headers=["*"], | |
| ) | |
| def verify_auth(authorization: str | None) -> None: | |
| if not SERVICE_SECRET: | |
| return | |
| if not authorization or authorization != f"Bearer {SERVICE_SECRET}": | |
| raise HTTPException(status_code=401, detail="Unauthorized") | |
| def health() -> dict[str, str]: | |
| return {"status": "ok"} | |
| async def parse_document( | |
| file: UploadFile = File(...), | |
| type: str = Query("auto", pattern="^(auto|invoice|receipt)$"), | |
| authorization: str | None = Header(default=None), | |
| ) -> dict: | |
| verify_auth(authorization) | |
| if not PARSE_SCRIPT.exists(): | |
| raise HTTPException(status_code=500, detail="parse_vendor_document.py not found") | |
| contents = await file.read() | |
| if not contents: | |
| raise HTTPException(status_code=400, detail="Empty file") | |
| suffix = Path(file.filename or "upload.png").suffix or ".png" | |
| with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: | |
| tmp.write(contents) | |
| image_path = tmp.name | |
| try: | |
| completed = subprocess.run( | |
| [sys.executable, str(PARSE_SCRIPT), "--image", image_path, "--type", type], | |
| capture_output=True, | |
| text=True, | |
| timeout=int(os.environ.get("DOCUMENT_PARSER_TIMEOUT_MS", "120000")) // 1000, | |
| cwd=str(REPO_ROOT), | |
| ) | |
| finally: | |
| Path(image_path).unlink(missing_ok=True) | |
| if completed.returncode != 0: | |
| detail = (completed.stderr or completed.stdout or "Parse failed").strip() | |
| raise HTTPException(status_code=500, detail=detail[:2000]) | |
| import json | |
| try: | |
| return json.loads(completed.stdout) | |
| except json.JSONDecodeError as error: | |
| raise HTTPException(status_code=500, detail=f"Invalid parser output: {error}") from error | |