from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from pathlib import Path from app.pdf_processor import extract_text_from_pdf from app.classifier import get_classifier app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize classifier (lazy loading) classifier = None def get_classifier_instance(): """Lazy load the classifier.""" global classifier if classifier is None: classifier = get_classifier() return classifier @app.on_event("startup") async def startup_event(): """Preload the classifier on startup to avoid first-request delay.""" print("Preloading classifier on startup...") try: get_classifier_instance() print("✅ Classifier loaded and ready!") except Exception as e: print(f"⚠️ Warning: Could not preload classifier: {e}") print("Classifier will be loaded on first request.") # ---- API ---- @app.get("/api/health") def health(): return {"status": "ok"} @app.get("/api/hello") def hello(): return {"message": "Hello from FastAPI"} @app.post("/api/classify") async def classify_document(file: UploadFile = File(...)): """ Classify a PDF document. Args: file: Uploaded PDF file Returns: Classification results with document type and confidence """ # Validate file type if not file.filename.lower().endswith('.pdf'): raise HTTPException(status_code=400, detail="Only PDF files are supported") try: # Read file content contents = await file.read() # Extract text from PDF text = extract_text_from_pdf(contents) if not text: raise HTTPException( status_code=400, detail="Could not extract text from PDF. The file might be empty, corrupted, or image-based." ) # Classify the document classifier_instance = get_classifier_instance() result = classifier_instance.classify_document(text) return JSONResponse(content={ "success": True, "filename": file.filename, "classification": result, "text_length": len(text) }) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}") # ---- Frontend static serving ---- FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist" INDEX_FILE = FRONTEND_DIST / "index.html" if FRONTEND_DIST.exists(): app.mount("/", StaticFiles(directory=str(FRONTEND_DIST), html=True), name="static") # SPA fallback: any non-/api route should return React index.html @app.get("/{full_path:path}") def spa_fallback(full_path: str): if full_path.startswith("api/"): return {"detail": "Not Found"} return FileResponse(str(INDEX_FILE))