# ============================================================ # FILE: src/llm_client.py # ============================================================ # PURPOSE: # Communicate with the cloud LLM API. # # This client is written for OpenAI-compatible chat completion APIs. # # CLōD is OpenAI-compatible, so the request format is: # # POST https://api.clod.io/v1/chat/completions # # Body: # { # "model": "...", # "messages": [...], # "temperature": 0.2, # "max_completion_tokens": 700 # } # # A production LLM client should handle: # - retries # - timeout # - rate limit errors # - provider errors # - latency tracking # - raw response logging # - safe secret handling # ============================================================ import json import time from typing import Any, Dict, List import requests from src.config import AppConfig class CloudLLMClient: """ Client for OpenAI-compatible cloud chat completion APIs. """ def __init__(self, config: AppConfig, use_max_tokens_style: bool = False) -> None: """ use_max_tokens_style: - False uses max_completion_tokens - True uses max_tokens CLōD examples use max_completion_tokens. """ self.config = config self.use_max_tokens_style = use_max_tokens_style def build_headers(self) -> Dict[str, str]: """ Build HTTP headers. Important: Do not print full headers in logs because headers contain secrets. """ if self.config.cloud_auth_prefix: auth_value = f"{self.config.cloud_auth_prefix} {self.config.cloud_api_key}" else: auth_value = self.config.cloud_api_key return { self.config.cloud_auth_header: auth_value, "Content-Type": "application/json", } def build_payload(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: """ Build request payload for chat completion. """ payload = { "model": self.config.cloud_chat_model, "messages": messages, "temperature": self.config.cloud_temperature, } if self.use_max_tokens_style: payload["max_tokens"] = self.config.cloud_max_completion_tokens else: payload["max_completion_tokens"] = self.config.cloud_max_completion_tokens return payload @staticmethod def extract_answer(response_json: Dict[str, Any]) -> str: """ Extract answer text from OpenAI-compatible response. Expected format: response_json["choices"][0]["message"]["content"] """ try: return response_json["choices"][0]["message"]["content"] except Exception: return json.dumps(response_json, indent=2) def chat(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: """ Send chat messages to the cloud LLM. Returns structured output for debugging and logging. """ payload = self.build_payload(messages) headers = self.build_headers() last_error = None start_time = time.time() retryable_status_codes = { 408, 409, 425, 429, 500, 502, 503, 504, } for attempt in range(1, self.config.cloud_max_retries + 1): try: response = requests.post( self.config.cloud_chat_completions_url, headers=headers, json=payload, timeout=self.config.cloud_timeout_seconds, ) try: response_json = response.json() except Exception: response_json = { "raw_text": response.text, } if response.status_code < 400: elapsed_seconds = round(time.time() - start_time, 3) return { "answer": self.extract_answer(response_json), "raw_response": response_json, "request_payload": payload, "status_code": response.status_code, "elapsed_seconds": elapsed_seconds, "attempts": attempt, } error_message = ( f"Cloud API error. " f"Status: {response.status_code}. " f"Response: {json.dumps(response_json, indent=2)}" ) if response.status_code not in retryable_status_codes: raise RuntimeError(error_message) last_error = RuntimeError(error_message) except requests.exceptions.RequestException as error: last_error = error if attempt < self.config.cloud_max_retries: sleep_time = self.config.cloud_retry_sleep_seconds * attempt print(f"Attempt {attempt} failed. Retrying in {sleep_time} seconds...") time.sleep(sleep_time) raise RuntimeError( f"Cloud LLM call failed after {self.config.cloud_max_retries} attempts. " f"Last error: {last_error}" ) def test_connection(self) -> str: """ Simple API test. This verifies: - API key - base URL - model name - provider compatibility """ messages = [ { "role": "system", "content": "You are a helpful assistant.", }, { "role": "user", "content": "Reply with one short sentence: API connection works.", }, ] result = self.chat(messages) return result["answer"]