Spaces:

Jack1808
/

rag-system

Runtime error

rag-system / ollama_client.py

Jainish1808

Add endpoint fallback logic for Ollama Cloud requests

717ab4c 6 months ago

2.22 kB

	import os
	import requests
	from dotenv import load_dotenv
	import logging
	from typing import List

	# Load environment variables
	load_dotenv()

	logger = logging.getLogger(__name__)

	DEFAULT_OLLAMA_ENDPOINTS = [
	"https://api.ollama.com",
	"https://cloud.ollama.com"
	]

	ENV_OLLAMA_URL = os.getenv("OLLAMA_CLOUD_URL")


	def _candidate_endpoints() -> List[str]:
	"""Return ordered list of Ollama Cloud base URLs to try."""
	endpoints: List[str] = []

	if ENV_OLLAMA_URL:
	endpoints.append(ENV_OLLAMA_URL.rstrip("/"))

	for url in DEFAULT_OLLAMA_ENDPOINTS:
	normalized = url.rstrip("/")
	if normalized not in endpoints:
	endpoints.append(normalized)

	return endpoints
	OLLAMA_API_KEY = os.getenv("OLLAMA_API_KEY")

	def generate_from_ollama(model: str, prompt: str, max_tokens: int = 512, stream: bool = False) -> str:
	"""
	Calls Ollama Cloud to generate text from `model` given `prompt`.
	Returns the generated text (string).
	"""
	if not OLLAMA_API_KEY:
	raise RuntimeError(
	"Missing OLLAMA_API_KEY environment variable. "
	"Please set it in your Hugging Face Space settings under 'Settings > Variables and secrets'"
	)

	headers = {
	"Authorization": f"Bearer {OLLAMA_API_KEY}",
	"Content-Type": "application/json",
	}
	payload = {
	"model": model,
	"messages": [
	{"role": "user", "content": prompt}
	],
	"max_tokens": max_tokens,
	"stream": stream
	}

	errors = []

	for base_url in _candidate_endpoints():
	url = f"{base_url}/v1/chat/completions"
	try:
	resp = requests.post(url, headers=headers, json=payload, timeout=120)
	resp.raise_for_status()
	data = resp.json()
	return data.get("choices", [{}])[0].get("message", {}).get("content", "")
	except requests.exceptions.RequestException as exc:
	error_msg = f"{base_url}: {exc}"
	errors.append(error_msg)
	logger.error(f"Ollama API request failed: {error_msg}")

	raise RuntimeError(
	"Failed to generate response from Ollama Cloud. Attempts: " + "; ".join(errors)
	)