Spaces:
Sleeping
Sleeping
| import os, json | |
| import httpx | |
| class LLMClient: | |
| def __init__(self, backend="ollama", url=None, api_key=None, model=None): | |
| self.backend = backend | |
| self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434") | |
| self.api_key = api_key or os.getenv("GROQ_API_KEY") | |
| self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest") | |
| self.http = httpx.AsyncClient(timeout=30) | |
| async def simple_call(self, prompt: str, temperature: float = 0.0) -> str: | |
| if self.backend=="ollama": | |
| if not self.url or not self.model: | |
| raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}. Set OLLAMA_URL and OLLAMA_MODEL env vars.") | |
| try: | |
| # Ollama uses /api/generate endpoint | |
| r = await self.http.post( | |
| f"{self.url}/api/generate", | |
| json={ | |
| "model": self.model, | |
| "prompt": prompt, | |
| "stream": False, | |
| "options": {"temperature": temperature} | |
| } | |
| ) | |
| r.raise_for_status() | |
| response_data = r.json() | |
| return response_data.get("response", "") | |
| except httpx.HTTPStatusError as e: | |
| if e.response.status_code == 404: | |
| raise RuntimeError( | |
| f"Ollama endpoint not found. Is Ollama running at {self.url}? " | |
| f"Or does the model '{self.model}' exist? " | |
| f"Try: ollama pull {self.model}" | |
| ) | |
| elif e.response.status_code == 400: | |
| error_detail = e.response.json().get("error", "Unknown error") | |
| raise RuntimeError(f"Ollama API error: {error_detail}") | |
| else: | |
| raise RuntimeError(f"Ollama API error: HTTP {e.response.status_code} - {e.response.text}") | |
| except httpx.ConnectError: | |
| raise RuntimeError( | |
| f"Cannot connect to Ollama at {self.url}. " | |
| f"Is Ollama running? Start it with: ollama serve" | |
| ) | |
| except Exception as e: | |
| raise RuntimeError(f"LLM call failed: {str(e)}") | |
| raise RuntimeError("Unsupported backend") | |