Lancer / app /agents /llm_client.py
Madras1's picture
Upload 36 files
7fbd9ac verified
"""LLM client abstraction for multiple providers.
Supports Groq and OpenRouter for LLM inference.
"""
import httpx
import json
from typing import Optional, AsyncIterator
import asyncio
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type,
)
from app.config import get_settings
class RetryableError(Exception):
"""Error that should trigger a retry."""
pass
async def generate_completion(
messages: list[dict],
model: Optional[str] = None,
temperature: float = 0.3,
max_tokens: int = 2048,
) -> str:
"""Generate a completion using the configured LLM provider."""
settings = get_settings()
provider = settings.llm_provider
model = model or settings.llm_model
if provider == "groq":
return await _call_groq(messages, model, temperature, max_tokens)
elif provider == "openrouter":
return await _call_openrouter(messages, model, temperature, max_tokens)
else:
raise ValueError(f"Unknown LLM provider: {provider}")
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type(RetryableError),
reraise=True,
)
async def _call_groq(
messages: list[dict],
model: str,
temperature: float,
max_tokens: int,
) -> str:
"""Call Groq API with retry logic."""
settings = get_settings()
if not settings.groq_api_key:
raise ValueError("GROQ_API_KEY not configured")
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Authorization": f"Bearer {settings.groq_api_key}",
"Content-Type": "application/json",
},
json={
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
},
)
# Retry on rate limit or server errors
if response.status_code in (429, 502, 503, 504):
raise RetryableError(f"Groq error {response.status_code}")
response.raise_for_status()
data = response.json()
return data["choices"][0]["message"]["content"]
except httpx.TimeoutException as e:
raise RetryableError(f"Groq timeout: {e}")
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=2, max=10),
retry=retry_if_exception_type(RetryableError),
reraise=True,
)
async def _call_openrouter(
messages: list[dict],
model: str,
temperature: float,
max_tokens: int,
) -> str:
"""Call OpenRouter API with retry logic."""
settings = get_settings()
if not settings.openrouter_api_key:
raise ValueError("OPENROUTER_API_KEY not configured")
headers = {
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://madras1-lancer.hf.space",
"X-Title": "Lancer Search API",
}
payload = {
"model": model,
"messages": messages,
}
try:
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
content=json.dumps(payload),
)
# Retry on rate limit or server errors
if response.status_code in (429, 502, 503, 504):
raise RetryableError(f"OpenRouter error {response.status_code}")
if response.status_code != 200:
error_text = response.text
raise ValueError(f"OpenRouter error {response.status_code}: {error_text}")
data = response.json()
return data["choices"][0]["message"]["content"]
except httpx.TimeoutException as e:
raise RetryableError(f"OpenRouter timeout: {e}")
async def generate_completion_stream(
messages: list[dict],
model: Optional[str] = None,
temperature: float = 0.3,
max_tokens: int = 2048,
) -> AsyncIterator[str]:
"""Generate a streaming completion using OpenRouter."""
settings = get_settings()
model = model or settings.llm_model
if not settings.openrouter_api_key:
raise ValueError("OPENROUTER_API_KEY not configured")
headers = {
"Authorization": f"Bearer {settings.openrouter_api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://madras1-lancer.hf.space",
"X-Title": "Lancer Search API",
}
payload = {
"model": model,
"messages": messages,
"stream": True,
}
async with httpx.AsyncClient(timeout=120.0) as client:
async with client.stream(
"POST",
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
content=json.dumps(payload),
) as response:
if response.status_code != 200:
error_text = await response.aread()
raise ValueError(f"OpenRouter streaming error {response.status_code}: {error_text}")
async for line in response.aiter_lines():
if line.startswith("data: "):
data_str = line[6:]
if data_str.strip() == "[DONE]":
break
try:
data = json.loads(data_str)
delta = data.get("choices", [{}])[0].get("delta", {})
content = delta.get("content", "")
if content:
yield content
except json.JSONDecodeError:
continue