import traceback from contextlib import asynccontextmanager from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from routes.chat import router as chat_router from routes.upload import router as upload_router from routes.health import router as health_router from utils.logger import logger from llm.model_loader import get_llm from embeddings.embedding_model import get_embedding_model from embeddings.vector_store import load_vector_store @asynccontextmanager async def lifespan(app: FastAPI): # Eagerly load models on startup logger.info("Eagerly loading LLM model on startup...") try: get_llm() logger.info("LLM model loaded successfully!") except Exception as e: logger.error(f"Error loading LLM model on startup: {e}") logger.info("Eagerly loading embedding model on startup...") try: get_embedding_model() logger.info("Embedding model loaded successfully!") except Exception as e: logger.error(f"Error loading embedding model on startup: {e}") logger.info("Eagerly loading vector store on startup...") try: load_vector_store() logger.info("Vector store loaded successfully!") except Exception as e: logger.error(f"Error loading vector store on startup: {e}") yield app = FastAPI(title="AI Assistant", lifespan=lifespan) app.include_router(chat_router) app.include_router(upload_router) app.include_router(health_router) # Global fallback: convert unhandled exceptions to JSON 500 with detail @app.exception_handler(Exception) async def global_exception_handler(request: Request, exc: Exception): logger.error(f"Unhandled exception: {exc}\n{traceback.format_exc()}") return JSONResponse( status_code=500, content={ "error": type(exc).__name__, "detail": str(exc), } ) @app.get("/") def root(): return {"message": "AI Engine Running"}