rag-system / ollama_client.py
Jainish1808
Add endpoint fallback logic for Ollama Cloud requests
717ab4c
import os
import requests
from dotenv import load_dotenv
import logging
from typing import List
# Load environment variables
load_dotenv()
logger = logging.getLogger(__name__)
DEFAULT_OLLAMA_ENDPOINTS = [
"https://api.ollama.com",
"https://cloud.ollama.com"
]
ENV_OLLAMA_URL = os.getenv("OLLAMA_CLOUD_URL")
def _candidate_endpoints() -> List[str]:
"""Return ordered list of Ollama Cloud base URLs to try."""
endpoints: List[str] = []
if ENV_OLLAMA_URL:
endpoints.append(ENV_OLLAMA_URL.rstrip("/"))
for url in DEFAULT_OLLAMA_ENDPOINTS:
normalized = url.rstrip("/")
if normalized not in endpoints:
endpoints.append(normalized)
return endpoints
OLLAMA_API_KEY = os.getenv("OLLAMA_API_KEY")
def generate_from_ollama(model: str, prompt: str, max_tokens: int = 512, stream: bool = False) -> str:
"""
Calls Ollama Cloud to generate text from `model` given `prompt`.
Returns the generated text (string).
"""
if not OLLAMA_API_KEY:
raise RuntimeError(
"Missing OLLAMA_API_KEY environment variable. "
"Please set it in your Hugging Face Space settings under 'Settings > Variables and secrets'"
)
headers = {
"Authorization": f"Bearer {OLLAMA_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"max_tokens": max_tokens,
"stream": stream
}
errors = []
for base_url in _candidate_endpoints():
url = f"{base_url}/v1/chat/completions"
try:
resp = requests.post(url, headers=headers, json=payload, timeout=120)
resp.raise_for_status()
data = resp.json()
return data.get("choices", [{}])[0].get("message", {}).get("content", "")
except requests.exceptions.RequestException as exc:
error_msg = f"{base_url}: {exc}"
errors.append(error_msg)
logger.error(f"Ollama API request failed: {error_msg}")
raise RuntimeError(
"Failed to generate response from Ollama Cloud. Attempts: " + "; ".join(errors)
)