import os, json import httpx class LLMClient: def __init__(self, backend="ollama", url=None, api_key=None, model=None): self.backend = backend self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434") self.api_key = api_key or os.getenv("GROQ_API_KEY") self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest") self.http = httpx.AsyncClient(timeout=30) async def simple_call(self, prompt: str, temperature: float = 0.0) -> str: if self.backend=="ollama": if not self.url or not self.model: raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}. Set OLLAMA_URL and OLLAMA_MODEL env vars.") try: # Ollama uses /api/generate endpoint r = await self.http.post( f"{self.url}/api/generate", json={ "model": self.model, "prompt": prompt, "stream": False, "options": {"temperature": temperature} } ) r.raise_for_status() response_data = r.json() return response_data.get("response", "") except httpx.HTTPStatusError as e: if e.response.status_code == 404: raise RuntimeError( f"Ollama endpoint not found. Is Ollama running at {self.url}? " f"Or does the model '{self.model}' exist? " f"Try: ollama pull {self.model}" ) elif e.response.status_code == 400: error_detail = e.response.json().get("error", "Unknown error") raise RuntimeError(f"Ollama API error: {error_detail}") else: raise RuntimeError(f"Ollama API error: HTTP {e.response.status_code} - {e.response.text}") except httpx.ConnectError: raise RuntimeError( f"Cannot connect to Ollama at {self.url}. " f"Is Ollama running? Start it with: ollama serve" ) except Exception as e: raise RuntimeError(f"LLM call failed: {str(e)}") raise RuntimeError("Unsupported backend")