Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

guardrails-final / llm_clients /ollama.py

zazaman

Add multilingual translation support with Qwen3-0.6B-GGUF and optimize for Hugging Face Spaces deployment

a2e1879 about 1 month ago

raw

history blame contribute delete

3.31 kB

	# llm_clients/ollama.py
	from typing import Generator, Any, Dict
	import requests # Example: Using the requests library
	import json
	from .base import LlmClient

	class OllamaClient(LlmClient):
	"""LLM client for Ollama models."""

	def __init__(self, config_dict: Dict[str, Any], system_prompt: str):
	super().__init__(config_dict, system_prompt)
	# Example: Validate that the Ollama host is reachable
	try:
	response = requests.get(self.config['host'])
	response.raise_for_status()
	except requests.exceptions.RequestException as e:
	raise ConnectionError(f"Could not connect to Ollama host at {self.config['host']}. Is Ollama running?") from e

	print(f"✅ Ollama Client initialized for model '{self.config['model']}' at host '{self.config['host']}'.")

	def generate_content(self, prompt: str) -> Any:
	"""
	Generates a non-streaming response from Ollama.
	This is a placeholder and needs to be implemented based on Ollama's API.
	"""
	# See Ollama REST API documentation: https://github.com/ollama/ollama/blob/main/docs/api.md
	full_prompt = f"{self.system_prompt}\n\nUser: {prompt}"

	payload = {
	"model": self.config['model'],
	"prompt": full_prompt,
	"stream": False
	}

	response = requests.post(f"{self.config['host']}/api/generate", json=payload)
	response.raise_for_status()

	# The response from Ollama is a JSON object. You might need to parse it
	# to return the actual text content.
	# Example:
	# return response.json().get("response", "")
	raise NotImplementedError("Ollama non-streaming generation is not yet implemented.")

	def generate_content_stream(self, prompt: str) -> Generator[Any, None, None]:
	"""
	Generates a streaming response from Ollama.
	This is a placeholder and needs to be implemented.
	"""
	# See Ollama REST API documentation for streaming: https://github.com/ollama/ollama/blob/main/docs/api.md
	full_prompt = f"{self.system_prompt}\n\nUser: {prompt}"

	payload = {
	"model": self.config['model'],
	"prompt": full_prompt,
	"stream": True
	}

	try:
	with requests.post(f"{self.config['host']}/api/generate", json=payload, stream=True) as response:
	response.raise_for_status()
	for line in response.iter_lines():
	if line:
	chunk = json.loads(line)
	yield chunk.get("response", "")
	except requests.exceptions.RequestException as e:
	print(f"Error during Ollama stream: {e}")
	raise
	except json.JSONDecodeError as e:
	print(f"Error decoding JSON from Ollama stream: {e}")
	raise

	def _generate_content_impl(self, prompt: str) -> str:
	"""Implementation for base class compatibility."""
	return self.generate_content(prompt)

	def _generate_content_stream_impl(self, prompt: str) -> Generator[Any, None, None]:
	"""Implementation for base class compatibility."""
	return self.generate_content_stream(prompt)