Babu Pallam
Add OpenAI-compatible cloud LLM client
e05407d
Raw
History Blame Contribute Delete
5.92 kB
# ============================================================
# FILE: src/llm_client.py
# ============================================================
# PURPOSE:
# Communicate with the cloud LLM API.
#
# This client is written for OpenAI-compatible chat completion APIs.
#
# CLōD is OpenAI-compatible, so the request format is:
#
# POST https://api.clod.io/v1/chat/completions
#
# Body:
# {
# "model": "...",
# "messages": [...],
# "temperature": 0.2,
# "max_completion_tokens": 700
# }
#
# A production LLM client should handle:
# - retries
# - timeout
# - rate limit errors
# - provider errors
# - latency tracking
# - raw response logging
# - safe secret handling
# ============================================================
import json
import time
from typing import Any, Dict, List
import requests
from src.config import AppConfig
class CloudLLMClient:
"""
Client for OpenAI-compatible cloud chat completion APIs.
"""
def __init__(self, config: AppConfig, use_max_tokens_style: bool = False) -> None:
"""
use_max_tokens_style:
- False uses max_completion_tokens
- True uses max_tokens
CLōD examples use max_completion_tokens.
"""
self.config = config
self.use_max_tokens_style = use_max_tokens_style
def build_headers(self) -> Dict[str, str]:
"""
Build HTTP headers.
Important:
Do not print full headers in logs because headers contain secrets.
"""
if self.config.cloud_auth_prefix:
auth_value = f"{self.config.cloud_auth_prefix} {self.config.cloud_api_key}"
else:
auth_value = self.config.cloud_api_key
return {
self.config.cloud_auth_header: auth_value,
"Content-Type": "application/json",
}
def build_payload(self, messages: List[Dict[str, str]]) -> Dict[str, Any]:
"""
Build request payload for chat completion.
"""
payload = {
"model": self.config.cloud_chat_model,
"messages": messages,
"temperature": self.config.cloud_temperature,
}
if self.use_max_tokens_style:
payload["max_tokens"] = self.config.cloud_max_completion_tokens
else:
payload["max_completion_tokens"] = self.config.cloud_max_completion_tokens
return payload
@staticmethod
def extract_answer(response_json: Dict[str, Any]) -> str:
"""
Extract answer text from OpenAI-compatible response.
Expected format:
response_json["choices"][0]["message"]["content"]
"""
try:
return response_json["choices"][0]["message"]["content"]
except Exception:
return json.dumps(response_json, indent=2)
def chat(self, messages: List[Dict[str, str]]) -> Dict[str, Any]:
"""
Send chat messages to the cloud LLM.
Returns structured output for debugging and logging.
"""
payload = self.build_payload(messages)
headers = self.build_headers()
last_error = None
start_time = time.time()
retryable_status_codes = {
408,
409,
425,
429,
500,
502,
503,
504,
}
for attempt in range(1, self.config.cloud_max_retries + 1):
try:
response = requests.post(
self.config.cloud_chat_completions_url,
headers=headers,
json=payload,
timeout=self.config.cloud_timeout_seconds,
)
try:
response_json = response.json()
except Exception:
response_json = {
"raw_text": response.text,
}
if response.status_code < 400:
elapsed_seconds = round(time.time() - start_time, 3)
return {
"answer": self.extract_answer(response_json),
"raw_response": response_json,
"request_payload": payload,
"status_code": response.status_code,
"elapsed_seconds": elapsed_seconds,
"attempts": attempt,
}
error_message = (
f"Cloud API error. "
f"Status: {response.status_code}. "
f"Response: {json.dumps(response_json, indent=2)}"
)
if response.status_code not in retryable_status_codes:
raise RuntimeError(error_message)
last_error = RuntimeError(error_message)
except requests.exceptions.RequestException as error:
last_error = error
if attempt < self.config.cloud_max_retries:
sleep_time = self.config.cloud_retry_sleep_seconds * attempt
print(f"Attempt {attempt} failed. Retrying in {sleep_time} seconds...")
time.sleep(sleep_time)
raise RuntimeError(
f"Cloud LLM call failed after {self.config.cloud_max_retries} attempts. "
f"Last error: {last_error}"
)
def test_connection(self) -> str:
"""
Simple API test.
This verifies:
- API key
- base URL
- model name
- provider compatibility
"""
messages = [
{
"role": "system",
"content": "You are a helpful assistant.",
},
{
"role": "user",
"content": "Reply with one short sentence: API connection works.",
},
]
result = self.chat(messages)
return result["answer"]