nothingworry's picture
feat: Enhance admin rules with file upload, drag-and-drop, chunk processing, and improved UI
a477044
raw
history blame
4.53 kB
import os, json
import httpx
from typing import AsyncGenerator
class LLMClient:
def __init__(self, backend="ollama", url=None, api_key=None, model=None):
self.backend = backend
self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
self.api_key = api_key or os.getenv("GROQ_API_KEY")
self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
self.http = httpx.AsyncClient(timeout=30)
async def simple_call(self, prompt: str, temperature: float = 0.0) -> str:
if self.backend=="ollama":
if not self.url or not self.model:
raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}. Set OLLAMA_URL and OLLAMA_MODEL env vars.")
try:
# Ollama uses /api/generate endpoint
r = await self.http.post(
f"{self.url}/api/generate",
json={
"model": self.model,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature}
}
)
r.raise_for_status()
response_data = r.json()
return response_data.get("response", "")
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
raise RuntimeError(
f"Ollama endpoint not found. Is Ollama running at {self.url}? "
f"Or does the model '{self.model}' exist? "
f"Try: ollama pull {self.model}"
)
elif e.response.status_code == 400:
error_detail = e.response.json().get("error", "Unknown error")
raise RuntimeError(f"Ollama API error: {error_detail}")
else:
raise RuntimeError(f"Ollama API error: HTTP {e.response.status_code} - {e.response.text}")
except httpx.ConnectError:
raise RuntimeError(
f"Cannot connect to Ollama at {self.url}. "
f"Is Ollama running? Start it with: ollama serve"
)
except Exception as e:
raise RuntimeError(f"LLM call failed: {str(e)}")
raise RuntimeError("Unsupported backend")
async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
"""Stream LLM response token by token."""
if self.backend == "ollama":
if not self.url or not self.model:
raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}")
try:
async with httpx.AsyncClient(timeout=300.0) as client:
async with client.stream(
"POST",
f"{self.url}/api/generate",
json={
"model": self.model,
"prompt": prompt,
"stream": True,
"options": {"temperature": temperature}
}
) as response:
response.raise_for_status()
async for line in response.aiter_lines():
if line:
try:
data = json.loads(line)
token = data.get("response", "")
if token:
yield token
# Check if done
if data.get("done", False):
break
except json.JSONDecodeError:
continue
# Yield empty string to keep connection alive if needed
# This helps with buffering issues
except httpx.ConnectError:
raise RuntimeError(
f"Cannot connect to Ollama at {self.url}. "
f"Is Ollama running? Start it with: ollama serve"
)
except Exception as e:
raise RuntimeError(f"LLM streaming failed: {str(e)}")
else:
raise RuntimeError("Streaming not supported for this backend")