""" FastAPI web service for document text extraction. Provides REST API endpoints for uploading and processing documents. """ from fastapi import FastAPI, File, UploadFile, HTTPException, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles import uvicorn import tempfile import os import json from pathlib import Path from typing import List, Optional, Dict, Any import shutil from src.inference import DocumentInference # Initialize FastAPI app app = FastAPI( title="Document Text Extraction API", description="Extract structured information from documents using Small Language Model (SLM)", version="1.0.0" ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Global inference pipeline inference_pipeline: Optional[DocumentInference] = None def get_inference_pipeline() -> DocumentInference: """Get or initialize the inference pipeline.""" global inference_pipeline if inference_pipeline is None: model_path = "models/document_ner_model" if not Path(model_path).exists(): raise HTTPException( status_code=503, detail="Model not found. Please train the model first by running training_pipeline.py" ) try: inference_pipeline = DocumentInference(model_path) except Exception as e: raise HTTPException( status_code=503, detail=f"Failed to load model: {str(e)}" ) return inference_pipeline @app.on_event("startup") async def startup_event(): """Initialize the model on startup.""" try: get_inference_pipeline() print("Model loaded successfully on startup") except Exception as e: print(f"Failed to load model on startup: {e}") print("Model will be loaded on first request") @app.get("/", response_class=HTMLResponse) async def root(): """Serve the main HTML interface.""" html_content = """
Extract structured information from documents using AI