Seth
Fix Dockerfile and add model preloading for HF Spaces
5913b23
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from app.pdf_processor import extract_text_from_pdf
from app.classifier import get_classifier
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize classifier (lazy loading)
classifier = None
def get_classifier_instance():
"""Lazy load the classifier."""
global classifier
if classifier is None:
classifier = get_classifier()
return classifier
@app.on_event("startup")
async def startup_event():
"""Preload the classifier on startup to avoid first-request delay."""
print("Preloading classifier on startup...")
try:
get_classifier_instance()
print("✅ Classifier loaded and ready!")
except Exception as e:
print(f"⚠️ Warning: Could not preload classifier: {e}")
print("Classifier will be loaded on first request.")
# ---- API ----
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.get("/api/hello")
def hello():
return {"message": "Hello from FastAPI"}
@app.post("/api/classify")
async def classify_document(file: UploadFile = File(...)):
"""
Classify a PDF document.
Args:
file: Uploaded PDF file
Returns:
Classification results with document type and confidence
"""
# Validate file type
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are supported")
try:
# Read file content
contents = await file.read()
# Extract text from PDF
text = extract_text_from_pdf(contents)
if not text:
raise HTTPException(
status_code=400,
detail="Could not extract text from PDF. The file might be empty, corrupted, or image-based."
)
# Classify the document
classifier_instance = get_classifier_instance()
result = classifier_instance.classify_document(text)
return JSONResponse(content={
"success": True,
"filename": file.filename,
"classification": result,
"text_length": len(text)
})
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
# ---- Frontend static serving ----
FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist"
INDEX_FILE = FRONTEND_DIST / "index.html"
if FRONTEND_DIST.exists():
app.mount("/", StaticFiles(directory=str(FRONTEND_DIST), html=True), name="static")
# SPA fallback: any non-/api route should return React index.html
@app.get("/{full_path:path}")
def spa_fallback(full_path: str):
if full_path.startswith("api/"):
return {"detail": "Not Found"}
return FileResponse(str(INDEX_FILE))