Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

IntegraChat / backend /api /services /llm_client.py

nothingworry

feat: Enhance admin rules with file upload, drag-and-drop, chunk processing, and improved UI

a477044 21 days ago

raw

history blame

4.53 kB

	import os, json
	import httpx
	from typing import AsyncGenerator


	class LLMClient:

	def __init__(self, backend="ollama", url=None, api_key=None, model=None):
	self.backend = backend
	self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
	self.api_key = api_key or os.getenv("GROQ_API_KEY")
	self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
	self.http = httpx.AsyncClient(timeout=30)


	async def simple_call(self, prompt: str, temperature: float = 0.0) -> str:
	if self.backend=="ollama":
	if not self.url or not self.model:
	raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}. Set OLLAMA_URL and OLLAMA_MODEL env vars.")

	try:
	# Ollama uses /api/generate endpoint
	r = await self.http.post(
	f"{self.url}/api/generate",
	json={
	"model": self.model,
	"prompt": prompt,
	"stream": False,
	"options": {"temperature": temperature}
	}
	)
	r.raise_for_status()
	response_data = r.json()
	return response_data.get("response", "")
	except httpx.HTTPStatusError as e:
	if e.response.status_code == 404:
	raise RuntimeError(
	f"Ollama endpoint not found. Is Ollama running at {self.url}? "
	f"Or does the model '{self.model}' exist? "
	f"Try: ollama pull {self.model}"
	)
	elif e.response.status_code == 400:
	error_detail = e.response.json().get("error", "Unknown error")
	raise RuntimeError(f"Ollama API error: {error_detail}")
	else:
	raise RuntimeError(f"Ollama API error: HTTP {e.response.status_code} - {e.response.text}")
	except httpx.ConnectError:
	raise RuntimeError(
	f"Cannot connect to Ollama at {self.url}. "
	f"Is Ollama running? Start it with: ollama serve"
	)
	except Exception as e:
	raise RuntimeError(f"LLM call failed: {str(e)}")
	raise RuntimeError("Unsupported backend")

	async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
	"""Stream LLM response token by token."""
	if self.backend == "ollama":
	if not self.url or not self.model:
	raise RuntimeError(f"LLM not configured: url={self.url}, model={self.model}")

	try:
	async with httpx.AsyncClient(timeout=300.0) as client:
	async with client.stream(
	"POST",
	f"{self.url}/api/generate",
	json={
	"model": self.model,
	"prompt": prompt,
	"stream": True,
	"options": {"temperature": temperature}
	}
	) as response:
	response.raise_for_status()
	async for line in response.aiter_lines():
	if line:
	try:
	data = json.loads(line)
	token = data.get("response", "")
	if token:
	yield token
	# Check if done
	if data.get("done", False):
	break
	except json.JSONDecodeError:
	continue
	# Yield empty string to keep connection alive if needed
	# This helps with buffering issues
	except httpx.ConnectError:
	raise RuntimeError(
	f"Cannot connect to Ollama at {self.url}. "
	f"Is Ollama running? Start it with: ollama serve"
	)
	except Exception as e:
	raise RuntimeError(f"LLM streaming failed: {str(e)}")
	else:
	raise RuntimeError("Streaming not supported for this backend")