SorrowTea's picture
Upload 45 files
96abbd8 verified
"""
Lightweight HTTP client for OpenAI-compatible chat completions.
- Credentials are read from environment variables only.
- Supported environment variables:
* `LLM_API_BASE_URL`
* `LLM_API_KEY`
* `OPENAI_BASE_URL`
* `OPENAI_API_KEY`
* `API_URL`
* `API_KEY`
"""
from __future__ import annotations
import json
import os
import time
from typing import Dict, List
import requests
def _get_credentials() -> Dict[str, str]:
api_key = (
os.getenv("LLM_API_KEY")
or os.getenv("OPENAI_API_KEY")
or os.getenv("API_KEY")
)
base_url = (
os.getenv("LLM_API_BASE_URL")
or os.getenv("OPENAI_BASE_URL")
or os.getenv("API_URL")
)
if not api_key:
raise RuntimeError(
"Missing API key. Set one of: LLM_API_KEY, OPENAI_API_KEY, API_KEY."
)
if not base_url:
raise RuntimeError(
"Missing API base URL. Set one of: "
"LLM_API_BASE_URL, OPENAI_BASE_URL, API_URL."
)
return {"api_key": api_key, "base_url": base_url.rstrip("/")}
def _post_chat_completion(
messages: List[Dict[str, str]],
model: str,
temperature: float,
max_tokens: int,
) -> Dict:
creds = _get_credentials()
url = f"{creds['base_url']}/chat/completions"
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Content-Type": "application/json",
}
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
try:
return response.json()
except json.JSONDecodeError as exc: # pragma: no cover - defensive
raise RuntimeError(f"Non-JSON response from LLM API: {response.text[:200]}") from exc
def _extract_content(result: Dict) -> str:
choices = result.get("choices")
if not choices:
raise RuntimeError(f"LLM API response missing 'choices': {result}")
message = choices[0].get("message") or {}
content = message.get("content")
if content is None:
raise RuntimeError(f"LLM API response missing message content: {result}")
return content
def get_response(prompt: str, model: str, temperature: float = 0.01, maximum_retries: int = 10) -> str:
"""
Send a chat completion request using OpenAI-compatible REST calls.
"""
if model.startswith("deepseek"):
real_model = model.replace("-chat", "-v3").replace("-reasoner", "-r1")
else:
real_model = model
attempts = max(1, maximum_retries)
last_error: Exception | None = None
while attempts > 0:
try:
result = _post_chat_completion(
messages=[{"role": "user", "content": prompt}],
model=real_model,
temperature=temperature,
max_tokens=16384,
)
return _extract_content(result)
except Exception as exc: # noqa: BLE001
last_error = exc
attempts -= 1
if attempts == 0:
break
print(f"Error using API: {exc}. Retrying...")
time.sleep(2)
raise RuntimeError(f"Failed to get response from API after retries: {last_error}")