|
|
from __future__ import annotations
|
|
|
|
|
|
import asyncio
|
|
|
import json
|
|
|
|
|
|
import httpx
|
|
|
from fastapi import FastAPI
|
|
|
|
|
|
from .logging import logger
|
|
|
|
|
|
from .config import BRIDGE_BASE_URL, WARMUP_INIT_RETRIES, WARMUP_INIT_DELAY_S
|
|
|
from .bridge import initialize_once
|
|
|
from .router import router
|
|
|
|
|
|
|
|
|
app = FastAPI(title="OpenAI Chat Completions (Warp bridge) - Streaming")
|
|
|
app.include_router(router)
|
|
|
|
|
|
|
|
|
@app.on_event("startup")
|
|
|
async def _on_startup():
|
|
|
try:
|
|
|
logger.info("[OpenAI Compat] Server starting. BRIDGE_BASE_URL=%s", BRIDGE_BASE_URL)
|
|
|
logger.info("[OpenAI Compat] Endpoints: GET /healthz, GET /v1/models, POST /v1/chat/completions")
|
|
|
except Exception:
|
|
|
pass
|
|
|
|
|
|
url = f"{BRIDGE_BASE_URL}/healthz"
|
|
|
retries = WARMUP_INIT_RETRIES
|
|
|
delay_s = WARMUP_INIT_DELAY_S
|
|
|
for attempt in range(1, retries + 1):
|
|
|
try:
|
|
|
async with httpx.AsyncClient(timeout=5.0, trust_env=True) as client:
|
|
|
resp = await client.get(url)
|
|
|
if resp.status_code == 200:
|
|
|
logger.info("[OpenAI Compat] Bridge server is ready at %s", url)
|
|
|
break
|
|
|
else:
|
|
|
logger.warning("[OpenAI Compat] Bridge health at %s -> HTTP %s", url, resp.status_code)
|
|
|
except Exception as e:
|
|
|
logger.warning("[OpenAI Compat] Bridge health attempt %s/%s failed: %s", attempt, retries, e)
|
|
|
await asyncio.sleep(delay_s)
|
|
|
else:
|
|
|
logger.error("[OpenAI Compat] Bridge server not ready at %s", url)
|
|
|
|
|
|
try:
|
|
|
await asyncio.to_thread(initialize_once)
|
|
|
except Exception as e:
|
|
|
logger.warning(f"[OpenAI Compat] Warmup initialize_once on startup failed: {e}") |