nothingworry's picture
Add Docker support and remove Ollama
0452a50
raw
history blame
4.72 kB
import os, json
import httpx
from typing import AsyncGenerator
class LLMClient:
def __init__(self, api_key=None, model=None):
self.api_key = api_key or os.getenv("GROQ_API_KEY")
self.model = model or os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
self.http = httpx.AsyncClient(timeout=30)
async def simple_call(self, prompt: str, temperature: float = 0.0) -> str:
if not self.api_key:
raise RuntimeError(
"Groq API key not configured. Set GROQ_API_KEY environment variable. "
"Get a free API key at https://console.groq.com"
)
if not self.model:
raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
try:
# Groq uses OpenAI-compatible API
r = await self.http.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": self.model,
"messages": [
{"role": "user", "content": prompt}
],
"temperature": temperature,
"stream": False
}
)
r.raise_for_status()
response_data = r.json()
return response_data["choices"][0]["message"]["content"]
except httpx.HTTPStatusError as e:
error_detail = "Unknown error"
try:
error_json = e.response.json()
error_detail = error_json.get("error", {}).get("message", str(error_json))
except:
error_detail = e.response.text
raise RuntimeError(f"Groq API error: HTTP {e.response.status_code} - {error_detail}")
except Exception as e:
raise RuntimeError(f"Groq API call failed: {str(e)}")
async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
"""Stream LLM response token by token."""
if not self.api_key:
raise RuntimeError(
"Groq API key not configured. Set GROQ_API_KEY environment variable. "
"Get a free API key at https://console.groq.com"
)
if not self.model:
raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
try:
async with httpx.AsyncClient(timeout=300.0) as client:
async with client.stream(
"POST",
"https://api.groq.com/openai/v1/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": self.model,
"messages": [
{"role": "user", "content": prompt}
],
"temperature": temperature,
"stream": True
}
) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if line:
# Groq uses Server-Sent Events format
if line.startswith("data: "):
data_str = line[6:] # Remove "data: " prefix
if data_str.strip() == "[DONE]":
break
try:
data = json.loads(data_str)
delta = data.get("choices", [{}])[0].get("delta", {})
token = delta.get("content", "")
if token:
yield token
except json.JSONDecodeError:
continue
except httpx.HTTPStatusError as e:
error_detail = "Unknown error"
try:
error_json = e.response.json()
error_detail = error_json.get("error", {}).get("message", str(error_json))
except:
error_detail = e.response.text
raise RuntimeError(f"Groq API streaming error: HTTP {e.response.status_code} - {error_detail}")
except Exception as e:
raise RuntimeError(f"Groq API streaming failed: {str(e)}")