Spaces:

jeanbaptdzd
/

open-finance-llm-8b

Paused

File size: 3,650 Bytes

"""Main FastAPI application entry point."""

import logging
import threading
from typing import Dict

from fastapi import FastAPI, status
from fastapi.responses import JSONResponse

from app import __version__
from app.config import settings
from app.middleware import api_key_guard
from app.middleware.rate_limit import rate_limit_middleware
from app.routers import openai_api

# Configure logging with level from settings
try:
    log_level = getattr(logging, settings.log_level.upper())
except AttributeError:
    print(f"Warning: Invalid log level '{settings.log_level}'. Falling back to INFO.")
    log_level = logging.INFO
logging.basicConfig(level=log_level)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="LLM Pro Finance API (Transformers)",
    description="OpenAI-compatible API for financial LLM inference",
    version=__version__
)

# Mount routers
app.include_router(openai_api.router, prefix="/v1")

# Rate limiting middleware (applied first)
app.middleware("http")(rate_limit_middleware)

# Optional API key middleware
app.middleware("http")(api_key_guard)


@app.on_event("startup")
async def startup_event() -> None:
    """Startup event - initialize model in background thread.
    
    Loads the model asynchronously to avoid blocking the API startup.
    Model loading happens in a daemon thread so it doesn't prevent shutdown.
    """
    logger.info("Starting LLM Pro Finance API...")
    
    force_reload = settings.force_model_reload
    if force_reload:
        logger.info("Force model reload enabled (FORCE_MODEL_RELOAD=true)")
    
    logger.info("Initializing model in background thread...")
    
    def load_model() -> None:
        """Load the model in a background thread."""
        from app.providers.transformers_provider import initialize_model
        initialize_model(force_reload=force_reload)
    
    # Start model loading in background thread
    thread = threading.Thread(target=load_model, daemon=True)
    thread.start()
    logger.info("Model initialization started in background")


@app.get("/")
async def root() -> Dict[str, str]:
    """Root endpoint returning API status and information.
    
    Returns:
        Dictionary containing API status, service name, version, model, and backend.
    """
    return {
        "status": "ok", 
        "service": "Qwen Open Finance R 8B Inference", 
        "version": __version__,
        "model": settings.model,
        "backend": "Transformers"
    }


@app.get("/health")
async def health() -> Dict[str, str]:
    """Liveness check endpoint for monitoring and load balancers.
    
    Returns:
        Dictionary indicating the service is alive.
    """
    return {"status": "service alive", "service": "LLM Pro Finance API"}


@app.get("/ready")
async def ready() -> JSONResponse:
    """Readiness check endpoint for orchestrators and load balancers.
    
    Checks if the model is loaded and ready to handle requests.
    Returns 503 Service Unavailable if the model is not ready.
    
    Returns:
        JSONResponse with ready/model_loaded fields and appropriate status code.
    """
    from app.providers.transformers_provider import is_model_ready
    
    model_loaded = is_model_ready()
    ready_status = model_loaded
    
    response_data = {
        "ready": ready_status,
        "model_loaded": model_loaded,
        "service": "LLM Pro Finance API"
    }
    
    if ready_status:
        return JSONResponse(content=response_data, status_code=status.HTTP_200_OK)
    else:
        return JSONResponse(
            content=response_data,
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE
        )