llm-ready-data / app /api /v1 /chat.py
light-infer-chat's picture
ok
1f652e8
Raw
History Blame Contribute Delete
4.01 kB
from __future__ import annotations
import logging
import time
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException, Request
from app.api.deps import require_auth
from app.services.chat_service import chat_completion
logger = logging.getLogger(__name__)
VALID_MODELS = ["agentdeck-1.0", "agentdeck-flash", "agentdeck-0.1"]
VALID_PROVIDERS = ["openprovider", "meganova", "aionlabs"]
router = APIRouter()
@router.post("/chat/completions")
async def create_chat_completion(
body: Dict[str, Any],
request: Request,
token: str = Depends(require_auth),
) -> Dict[str, Any]:
start = time.monotonic()
req_id = hex(int(time.time() * 1_000_000))[-8:]
messages: Optional[List[Dict[str, str]]] = body.get("messages")
if not messages or not isinstance(messages, list):
raise HTTPException(status_code=400, detail="messages is required and must be a non-empty array")
for msg in messages:
if not isinstance(msg, dict):
raise HTTPException(status_code=400, detail="Each message must be an object")
if not isinstance(msg.get("role"), str) or not isinstance(msg.get("content"), str):
raise HTTPException(status_code=400, detail="Each message must have string role and content fields")
if msg["role"] not in ("system", "user", "assistant"):
raise HTTPException(status_code=400, detail=f"Invalid role '{msg['role']}'. Must be system, user, or assistant")
model: Any = body.get("model", "agentdeck-1.0")
if not isinstance(model, str) or model not in VALID_MODELS:
raise HTTPException(
status_code=400,
detail=f"Invalid model '{model}'. Must be one of: {', '.join(VALID_MODELS)}",
)
provider: Any = body.get("provider")
if provider is not None:
if not isinstance(provider, str) or provider not in VALID_PROVIDERS:
raise HTTPException(
status_code=400,
detail=f"Invalid provider '{provider}'",
)
return_json: Any = body.get("return_json")
if return_json is not None and not isinstance(return_json, bool):
raise HTTPException(status_code=400, detail="return_json must be a boolean")
stream: Any = body.get("stream", False)
if not isinstance(stream, bool):
raise HTTPException(status_code=400, detail="stream must be a boolean")
if stream and return_json:
raise HTTPException(status_code=400, detail="stream and return_json cannot both be true")
logger.info(
"[%s] POST /chat/completions model=%s provider=%s messages=%d stream=%s",
req_id, model, provider, len(messages), stream,
)
max_tokens = body.get("max_tokens", 1024)
temperature = body.get("temperature", 0.7)
top_p = body.get("top_p", 0.9)
response_format = body.get("response_format", None)
redis = getattr(request.app.state, "redis", None)
scripts = getattr(request.app.state, "scripts", None)
try:
result = await chat_completion(
messages=messages,
model=model,
provider=provider,
response_format=response_format,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
redis=redis,
scripts=scripts,
)
elapsed = time.monotonic() - start
logger.info("[%s] success in %.0fms", req_id, elapsed * 1000)
return result
except RuntimeError as e:
elapsed = time.monotonic() - start
if "All AI providers exhausted" in str(e):
logger.warning("[%s] service_unavailable in %.0fms", req_id, elapsed * 1000)
raise HTTPException(
status_code=503,
detail="All API keys are currently locked. Retry after a few minutes.",
)
logger.warning("[%s] upstream_error in %.0fms: %s", req_id, elapsed * 1000, e)
raise HTTPException(status_code=502, detail=str(e))