| """ |
| Lightweight HTTP client for OpenAI-compatible chat completions. |
| |
| - Credentials are read from environment variables only. |
| - Supported environment variables: |
| * `LLM_API_BASE_URL` |
| * `LLM_API_KEY` |
| * `OPENAI_BASE_URL` |
| * `OPENAI_API_KEY` |
| * `API_URL` |
| * `API_KEY` |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import os |
| import time |
| from typing import Dict, List |
|
|
| import requests |
|
|
| def _get_credentials() -> Dict[str, str]: |
| api_key = ( |
| os.getenv("LLM_API_KEY") |
| or os.getenv("OPENAI_API_KEY") |
| or os.getenv("API_KEY") |
| ) |
| base_url = ( |
| os.getenv("LLM_API_BASE_URL") |
| or os.getenv("OPENAI_BASE_URL") |
| or os.getenv("API_URL") |
| ) |
| if not api_key: |
| raise RuntimeError( |
| "Missing API key. Set one of: LLM_API_KEY, OPENAI_API_KEY, API_KEY." |
| ) |
| if not base_url: |
| raise RuntimeError( |
| "Missing API base URL. Set one of: " |
| "LLM_API_BASE_URL, OPENAI_BASE_URL, API_URL." |
| ) |
| return {"api_key": api_key, "base_url": base_url.rstrip("/")} |
|
|
|
|
| def _post_chat_completion( |
| messages: List[Dict[str, str]], |
| model: str, |
| temperature: float, |
| max_tokens: int, |
| ) -> Dict: |
| creds = _get_credentials() |
| url = f"{creds['base_url']}/chat/completions" |
| headers = { |
| "Authorization": f"Bearer {creds['api_key']}", |
| "Content-Type": "application/json", |
| } |
| payload = { |
| "model": model, |
| "messages": messages, |
| "temperature": temperature, |
| "max_tokens": max_tokens, |
| } |
| response = requests.post(url, headers=headers, json=payload, timeout=120) |
| response.raise_for_status() |
| try: |
| return response.json() |
| except json.JSONDecodeError as exc: |
| raise RuntimeError(f"Non-JSON response from LLM API: {response.text[:200]}") from exc |
|
|
|
|
| def _extract_content(result: Dict) -> str: |
| choices = result.get("choices") |
| if not choices: |
| raise RuntimeError(f"LLM API response missing 'choices': {result}") |
| message = choices[0].get("message") or {} |
| content = message.get("content") |
| if content is None: |
| raise RuntimeError(f"LLM API response missing message content: {result}") |
| return content |
|
|
|
|
| def get_response(prompt: str, model: str, temperature: float = 0.01, maximum_retries: int = 10) -> str: |
| """ |
| Send a chat completion request using OpenAI-compatible REST calls. |
| """ |
| if model.startswith("deepseek"): |
| real_model = model.replace("-chat", "-v3").replace("-reasoner", "-r1") |
| else: |
| real_model = model |
|
|
| attempts = max(1, maximum_retries) |
| last_error: Exception | None = None |
| while attempts > 0: |
| try: |
| result = _post_chat_completion( |
| messages=[{"role": "user", "content": prompt}], |
| model=real_model, |
| temperature=temperature, |
| max_tokens=16384, |
| ) |
| return _extract_content(result) |
| except Exception as exc: |
| last_error = exc |
| attempts -= 1 |
| if attempts == 0: |
| break |
| print(f"Error using API: {exc}. Retrying...") |
| time.sleep(2) |
|
|
| raise RuntimeError(f"Failed to get response from API after retries: {last_error}") |
|
|