| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import json |
| import time |
| from typing import Any, Dict, List |
|
|
| import requests |
|
|
| from src.config import AppConfig |
|
|
|
|
| class CloudLLMClient: |
| """ |
| Client for OpenAI-compatible cloud chat completion APIs. |
| """ |
|
|
| def __init__(self, config: AppConfig, use_max_tokens_style: bool = False) -> None: |
| """ |
| use_max_tokens_style: |
| - False uses max_completion_tokens |
| - True uses max_tokens |
| |
| CLōD examples use max_completion_tokens. |
| """ |
|
|
| self.config = config |
| self.use_max_tokens_style = use_max_tokens_style |
|
|
| def build_headers(self) -> Dict[str, str]: |
| """ |
| Build HTTP headers. |
| |
| Important: |
| Do not print full headers in logs because headers contain secrets. |
| """ |
|
|
| if self.config.cloud_auth_prefix: |
| auth_value = f"{self.config.cloud_auth_prefix} {self.config.cloud_api_key}" |
| else: |
| auth_value = self.config.cloud_api_key |
|
|
| return { |
| self.config.cloud_auth_header: auth_value, |
| "Content-Type": "application/json", |
| } |
|
|
| def build_payload(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: |
| """ |
| Build request payload for chat completion. |
| """ |
|
|
| payload = { |
| "model": self.config.cloud_chat_model, |
| "messages": messages, |
| "temperature": self.config.cloud_temperature, |
| } |
|
|
| if self.use_max_tokens_style: |
| payload["max_tokens"] = self.config.cloud_max_completion_tokens |
| else: |
| payload["max_completion_tokens"] = self.config.cloud_max_completion_tokens |
|
|
| return payload |
|
|
| @staticmethod |
| def extract_answer(response_json: Dict[str, Any]) -> str: |
| """ |
| Extract answer text from OpenAI-compatible response. |
| |
| Expected format: |
| response_json["choices"][0]["message"]["content"] |
| """ |
|
|
| try: |
| return response_json["choices"][0]["message"]["content"] |
| except Exception: |
| return json.dumps(response_json, indent=2) |
|
|
| def chat(self, messages: List[Dict[str, str]]) -> Dict[str, Any]: |
| """ |
| Send chat messages to the cloud LLM. |
| |
| Returns structured output for debugging and logging. |
| """ |
|
|
| payload = self.build_payload(messages) |
| headers = self.build_headers() |
|
|
| last_error = None |
| start_time = time.time() |
|
|
| retryable_status_codes = { |
| 408, |
| 409, |
| 425, |
| 429, |
| 500, |
| 502, |
| 503, |
| 504, |
| } |
|
|
| for attempt in range(1, self.config.cloud_max_retries + 1): |
| try: |
| response = requests.post( |
| self.config.cloud_chat_completions_url, |
| headers=headers, |
| json=payload, |
| timeout=self.config.cloud_timeout_seconds, |
| ) |
|
|
| try: |
| response_json = response.json() |
| except Exception: |
| response_json = { |
| "raw_text": response.text, |
| } |
|
|
| if response.status_code < 400: |
| elapsed_seconds = round(time.time() - start_time, 3) |
|
|
| return { |
| "answer": self.extract_answer(response_json), |
| "raw_response": response_json, |
| "request_payload": payload, |
| "status_code": response.status_code, |
| "elapsed_seconds": elapsed_seconds, |
| "attempts": attempt, |
| } |
|
|
| error_message = ( |
| f"Cloud API error. " |
| f"Status: {response.status_code}. " |
| f"Response: {json.dumps(response_json, indent=2)}" |
| ) |
|
|
| if response.status_code not in retryable_status_codes: |
| raise RuntimeError(error_message) |
|
|
| last_error = RuntimeError(error_message) |
|
|
| except requests.exceptions.RequestException as error: |
| last_error = error |
|
|
| if attempt < self.config.cloud_max_retries: |
| sleep_time = self.config.cloud_retry_sleep_seconds * attempt |
| print(f"Attempt {attempt} failed. Retrying in {sleep_time} seconds...") |
| time.sleep(sleep_time) |
|
|
| raise RuntimeError( |
| f"Cloud LLM call failed after {self.config.cloud_max_retries} attempts. " |
| f"Last error: {last_error}" |
| ) |
|
|
| def test_connection(self) -> str: |
| """ |
| Simple API test. |
| |
| This verifies: |
| - API key |
| - base URL |
| - model name |
| - provider compatibility |
| """ |
|
|
| messages = [ |
| { |
| "role": "system", |
| "content": "You are a helpful assistant.", |
| }, |
| { |
| "role": "user", |
| "content": "Reply with one short sentence: API connection works.", |
| }, |
| ] |
|
|
| result = self.chat(messages) |
| return result["answer"] |