Spaces:
Sleeping
Sleeping
File size: 3,203 Bytes
f6e574f 25bda12 f6e574f 25bda12 f6e574f 5913b23 f6e574f 25bda12 f6e574f 25bda12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from app.pdf_processor import extract_text_from_pdf
from app.classifier import get_classifier
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize classifier (lazy loading)
classifier = None
def get_classifier_instance():
"""Lazy load the classifier."""
global classifier
if classifier is None:
classifier = get_classifier()
return classifier
@app.on_event("startup")
async def startup_event():
"""Preload the classifier on startup to avoid first-request delay."""
print("Preloading classifier on startup...")
try:
get_classifier_instance()
print("✅ Classifier loaded and ready!")
except Exception as e:
print(f"⚠️ Warning: Could not preload classifier: {e}")
print("Classifier will be loaded on first request.")
# ---- API ----
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.get("/api/hello")
def hello():
return {"message": "Hello from FastAPI"}
@app.post("/api/classify")
async def classify_document(file: UploadFile = File(...)):
"""
Classify a PDF document.
Args:
file: Uploaded PDF file
Returns:
Classification results with document type and confidence
"""
# Validate file type
if not file.filename.lower().endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are supported")
try:
# Read file content
contents = await file.read()
# Extract text from PDF
text = extract_text_from_pdf(contents)
if not text:
raise HTTPException(
status_code=400,
detail="Could not extract text from PDF. The file might be empty, corrupted, or image-based."
)
# Classify the document
classifier_instance = get_classifier_instance()
result = classifier_instance.classify_document(text)
return JSONResponse(content={
"success": True,
"filename": file.filename,
"classification": result,
"text_length": len(text)
})
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
# ---- Frontend static serving ----
FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist"
INDEX_FILE = FRONTEND_DIST / "index.html"
if FRONTEND_DIST.exists():
app.mount("/", StaticFiles(directory=str(FRONTEND_DIST), html=True), name="static")
# SPA fallback: any non-/api route should return React index.html
@app.get("/{full_path:path}")
def spa_fallback(full_path: str):
if full_path.startswith("api/"):
return {"detail": "Not Found"}
return FileResponse(str(INDEX_FILE))
|