Spaces:
Sleeping
Sleeping
File size: 2,653 Bytes
fbba60e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | """
Hosted document parser API for Fresh Catch Inventory.
Deploy to Hugging Face Spaces (Docker), Fly.io, or any VM with Python + Tesseract.
"""
from __future__ import annotations
import os
import subprocess
import sys
import tempfile
from pathlib import Path
from fastapi import FastAPI, File, Header, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
REPO_ROOT = Path(__file__).resolve().parents[2]
PARSE_SCRIPT = REPO_ROOT / "scripts" / "parse_vendor_document.py"
SERVICE_SECRET = os.environ.get("DOCUMENT_PARSER_SERVICE_SECRET", "").strip()
app = FastAPI(title="Fresh Catch Document Parser", version="1.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=os.environ.get("DOCUMENT_PARSER_CORS_ORIGINS", "*").split(","),
allow_credentials=True,
allow_methods=["POST", "GET"],
allow_headers=["*"],
)
def verify_auth(authorization: str | None) -> None:
if not SERVICE_SECRET:
return
if not authorization or authorization != f"Bearer {SERVICE_SECRET}":
raise HTTPException(status_code=401, detail="Unauthorized")
@app.get("/health")
def health() -> dict[str, str]:
return {"status": "ok"}
@app.post("/parse")
async def parse_document(
file: UploadFile = File(...),
type: str = Query("auto", pattern="^(auto|invoice|receipt)$"),
authorization: str | None = Header(default=None),
) -> dict:
verify_auth(authorization)
if not PARSE_SCRIPT.exists():
raise HTTPException(status_code=500, detail="parse_vendor_document.py not found")
contents = await file.read()
if not contents:
raise HTTPException(status_code=400, detail="Empty file")
suffix = Path(file.filename or "upload.png").suffix or ".png"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(contents)
image_path = tmp.name
try:
completed = subprocess.run(
[sys.executable, str(PARSE_SCRIPT), "--image", image_path, "--type", type],
capture_output=True,
text=True,
timeout=int(os.environ.get("DOCUMENT_PARSER_TIMEOUT_MS", "120000")) // 1000,
cwd=str(REPO_ROOT),
)
finally:
Path(image_path).unlink(missing_ok=True)
if completed.returncode != 0:
detail = (completed.stderr or completed.stdout or "Parse failed").strip()
raise HTTPException(status_code=500, detail=detail[:2000])
import json
try:
return json.loads(completed.stdout)
except json.JSONDecodeError as error:
raise HTTPException(status_code=500, detail=f"Invalid parser output: {error}") from error
|