File size: 3,650 Bytes
1e23279 f28306b 1e23279 9db586c 1e23279 dc14519 1e23279 6851411 dc14519 6541672 6851411 dc14519 7ee7723 dc14519 772dd21 6851411 1e23279 dc14519 1e23279 6851411 dc14519 6851411 1e23279 772dd21 1e23279 a750766 16c2a22 ee07ed2 1e23279 afd6869 16c2a22 ee07ed2 6851411 1e23279 6851411 f28306b 1e23279 772dd21 cc88da6 dc14519 1e23279 9c71bb7 772dd21 1e23279 772dd21 f28306b 9db586c 1e23279 9db586c 1e23279 9db586c 6851411 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""Main FastAPI application entry point."""
import logging
import threading
from typing import Dict
from fastapi import FastAPI, status
from fastapi.responses import JSONResponse
from app import __version__
from app.config import settings
from app.middleware import api_key_guard
from app.middleware.rate_limit import rate_limit_middleware
from app.routers import openai_api
# Configure logging with level from settings
try:
log_level = getattr(logging, settings.log_level.upper())
except AttributeError:
print(f"Warning: Invalid log level '{settings.log_level}'. Falling back to INFO.")
log_level = logging.INFO
logging.basicConfig(level=log_level)
logger = logging.getLogger(__name__)
app = FastAPI(
title="LLM Pro Finance API (Transformers)",
description="OpenAI-compatible API for financial LLM inference",
version=__version__
)
# Mount routers
app.include_router(openai_api.router, prefix="/v1")
# Rate limiting middleware (applied first)
app.middleware("http")(rate_limit_middleware)
# Optional API key middleware
app.middleware("http")(api_key_guard)
@app.on_event("startup")
async def startup_event() -> None:
"""Startup event - initialize model in background thread.
Loads the model asynchronously to avoid blocking the API startup.
Model loading happens in a daemon thread so it doesn't prevent shutdown.
"""
logger.info("Starting LLM Pro Finance API...")
force_reload = settings.force_model_reload
if force_reload:
logger.info("Force model reload enabled (FORCE_MODEL_RELOAD=true)")
logger.info("Initializing model in background thread...")
def load_model() -> None:
"""Load the model in a background thread."""
from app.providers.transformers_provider import initialize_model
initialize_model(force_reload=force_reload)
# Start model loading in background thread
thread = threading.Thread(target=load_model, daemon=True)
thread.start()
logger.info("Model initialization started in background")
@app.get("/")
async def root() -> Dict[str, str]:
"""Root endpoint returning API status and information.
Returns:
Dictionary containing API status, service name, version, model, and backend.
"""
return {
"status": "ok",
"service": "Qwen Open Finance R 8B Inference",
"version": __version__,
"model": settings.model,
"backend": "Transformers"
}
@app.get("/health")
async def health() -> Dict[str, str]:
"""Liveness check endpoint for monitoring and load balancers.
Returns:
Dictionary indicating the service is alive.
"""
return {"status": "service alive", "service": "LLM Pro Finance API"}
@app.get("/ready")
async def ready() -> JSONResponse:
"""Readiness check endpoint for orchestrators and load balancers.
Checks if the model is loaded and ready to handle requests.
Returns 503 Service Unavailable if the model is not ready.
Returns:
JSONResponse with ready/model_loaded fields and appropriate status code.
"""
from app.providers.transformers_provider import is_model_ready
model_loaded = is_model_ready()
ready_status = model_loaded
response_data = {
"ready": ready_status,
"model_loaded": model_loaded,
"service": "LLM Pro Finance API"
}
if ready_status:
return JSONResponse(content=response_data, status_code=status.HTTP_200_OK)
else:
return JSONResponse(
content=response_data,
status_code=status.HTTP_503_SERVICE_UNAVAILABLE
)
|