Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pathlib import Path | |
| from app.pdf_processor import extract_text_from_pdf | |
| from app.classifier import get_classifier | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Initialize classifier (lazy loading) | |
| classifier = None | |
| def get_classifier_instance(): | |
| """Lazy load the classifier.""" | |
| global classifier | |
| if classifier is None: | |
| classifier = get_classifier() | |
| return classifier | |
| async def startup_event(): | |
| """Preload the classifier on startup to avoid first-request delay.""" | |
| print("Preloading classifier on startup...") | |
| try: | |
| get_classifier_instance() | |
| print("✅ Classifier loaded and ready!") | |
| except Exception as e: | |
| print(f"⚠️ Warning: Could not preload classifier: {e}") | |
| print("Classifier will be loaded on first request.") | |
| # ---- API ---- | |
| def health(): | |
| return {"status": "ok"} | |
| def hello(): | |
| return {"message": "Hello from FastAPI"} | |
| async def classify_document(file: UploadFile = File(...)): | |
| """ | |
| Classify a PDF document. | |
| Args: | |
| file: Uploaded PDF file | |
| Returns: | |
| Classification results with document type and confidence | |
| """ | |
| # Validate file type | |
| if not file.filename.lower().endswith('.pdf'): | |
| raise HTTPException(status_code=400, detail="Only PDF files are supported") | |
| try: | |
| # Read file content | |
| contents = await file.read() | |
| # Extract text from PDF | |
| text = extract_text_from_pdf(contents) | |
| if not text: | |
| raise HTTPException( | |
| status_code=400, | |
| detail="Could not extract text from PDF. The file might be empty, corrupted, or image-based." | |
| ) | |
| # Classify the document | |
| classifier_instance = get_classifier_instance() | |
| result = classifier_instance.classify_document(text) | |
| return JSONResponse(content={ | |
| "success": True, | |
| "filename": file.filename, | |
| "classification": result, | |
| "text_length": len(text) | |
| }) | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}") | |
| # ---- Frontend static serving ---- | |
| FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist" | |
| INDEX_FILE = FRONTEND_DIST / "index.html" | |
| if FRONTEND_DIST.exists(): | |
| app.mount("/", StaticFiles(directory=str(FRONTEND_DIST), html=True), name="static") | |
| # SPA fallback: any non-/api route should return React index.html | |
| def spa_fallback(full_path: str): | |
| if full_path.startswith("api/"): | |
| return {"detail": "Not Found"} | |
| return FileResponse(str(INDEX_FILE)) | |