"""Main FastAPI application entry point.""" import logging import threading from typing import Dict from fastapi import FastAPI, status from fastapi.responses import JSONResponse from app import __version__ from app.config import settings from app.middleware import api_key_guard from app.middleware.rate_limit import rate_limit_middleware from app.routers import openai_api # Configure logging with level from settings try: log_level = getattr(logging, settings.log_level.upper()) except AttributeError: print(f"Warning: Invalid log level '{settings.log_level}'. Falling back to INFO.") log_level = logging.INFO logging.basicConfig(level=log_level) logger = logging.getLogger(__name__) app = FastAPI( title="LLM Pro Finance API (Transformers)", description="OpenAI-compatible API for financial LLM inference", version=__version__ ) # Mount routers app.include_router(openai_api.router, prefix="/v1") # Rate limiting middleware (applied first) app.middleware("http")(rate_limit_middleware) # Optional API key middleware app.middleware("http")(api_key_guard) @app.on_event("startup") async def startup_event() -> None: """Startup event - initialize model in background thread. Loads the model asynchronously to avoid blocking the API startup. Model loading happens in a daemon thread so it doesn't prevent shutdown. """ logger.info("Starting LLM Pro Finance API...") force_reload = settings.force_model_reload if force_reload: logger.info("Force model reload enabled (FORCE_MODEL_RELOAD=true)") logger.info("Initializing model in background thread...") def load_model() -> None: """Load the model in a background thread.""" from app.providers.transformers_provider import initialize_model initialize_model(force_reload=force_reload) # Start model loading in background thread thread = threading.Thread(target=load_model, daemon=True) thread.start() logger.info("Model initialization started in background") @app.get("/") async def root() -> Dict[str, str]: """Root endpoint returning API status and information. Returns: Dictionary containing API status, service name, version, model, and backend. """ return { "status": "ok", "service": "Qwen Open Finance R 8B Inference", "version": __version__, "model": settings.model, "backend": "Transformers" } @app.get("/health") async def health() -> Dict[str, str]: """Liveness check endpoint for monitoring and load balancers. Returns: Dictionary indicating the service is alive. """ return {"status": "service alive", "service": "LLM Pro Finance API"} @app.get("/ready") async def ready() -> JSONResponse: """Readiness check endpoint for orchestrators and load balancers. Checks if the model is loaded and ready to handle requests. Returns 503 Service Unavailable if the model is not ready. Returns: JSONResponse with ready/model_loaded fields and appropriate status code. """ from app.providers.transformers_provider import is_model_ready model_loaded = is_model_ready() ready_status = model_loaded response_data = { "ready": ready_status, "model_loaded": model_loaded, "service": "LLM Pro Finance API" } if ready_status: return JSONResponse(content=response_data, status_code=status.HTTP_200_OK) else: return JSONResponse( content=response_data, status_code=status.HTTP_503_SERVICE_UNAVAILABLE )