File size: 3,203 Bytes
f6e574f
 
25bda12
 
 
f6e574f
 
25bda12
 
 
 
 
 
 
 
 
 
 
f6e574f
 
 
 
 
 
 
 
 
5913b23
 
 
 
 
 
 
 
 
 
 
f6e574f
25bda12
 
 
 
 
 
 
 
 
f6e574f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25bda12
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pathlib import Path
from app.pdf_processor import extract_text_from_pdf
from app.classifier import get_classifier

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize classifier (lazy loading)
classifier = None

def get_classifier_instance():
    """Lazy load the classifier."""
    global classifier
    if classifier is None:
        classifier = get_classifier()
    return classifier

@app.on_event("startup")
async def startup_event():
    """Preload the classifier on startup to avoid first-request delay."""
    print("Preloading classifier on startup...")
    try:
        get_classifier_instance()
        print("✅ Classifier loaded and ready!")
    except Exception as e:
        print(f"⚠️  Warning: Could not preload classifier: {e}")
        print("Classifier will be loaded on first request.")

# ---- API ----
@app.get("/api/health")
def health():
    return {"status": "ok"}

@app.get("/api/hello")
def hello():
    return {"message": "Hello from FastAPI"}

@app.post("/api/classify")
async def classify_document(file: UploadFile = File(...)):
    """
    Classify a PDF document.
    
    Args:
        file: Uploaded PDF file
        
    Returns:
        Classification results with document type and confidence
    """
    # Validate file type
    if not file.filename.lower().endswith('.pdf'):
        raise HTTPException(status_code=400, detail="Only PDF files are supported")
    
    try:
        # Read file content
        contents = await file.read()
        
        # Extract text from PDF
        text = extract_text_from_pdf(contents)
        
        if not text:
            raise HTTPException(
                status_code=400, 
                detail="Could not extract text from PDF. The file might be empty, corrupted, or image-based."
            )
        
        # Classify the document
        classifier_instance = get_classifier_instance()
        result = classifier_instance.classify_document(text)
        
        return JSONResponse(content={
            "success": True,
            "filename": file.filename,
            "classification": result,
            "text_length": len(text)
        })
        
    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")

# ---- Frontend static serving ----
FRONTEND_DIST = Path(__file__).resolve().parents[2] / "frontend" / "dist"
INDEX_FILE = FRONTEND_DIST / "index.html"

if FRONTEND_DIST.exists():
    app.mount("/", StaticFiles(directory=str(FRONTEND_DIST), html=True), name="static")

    # SPA fallback: any non-/api route should return React index.html
    @app.get("/{full_path:path}")
    def spa_fallback(full_path: str):
        if full_path.startswith("api/"):
            return {"detail": "Not Found"}
        return FileResponse(str(INDEX_FILE))