Spaces:
Runtime error
Runtime error
File size: 1,809 Bytes
c332c52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
# Default: Hugging Face or Ollama fallback
def run_llm(prompt: str) -> str:
"""
Route prompt to an available LLM backend.
- Prefers Hugging Face Inference API if HF_API_TOKEN is set
- Falls back to Ollama (local) if OLLAMA_MODEL is set
- Otherwise returns a static placeholder response
"""
# Hugging Face API
hf_token = os.getenv("HF_API_TOKEN")
hf_model = os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
if hf_token:
try:
import requests
response = requests.post(
f"https://api-inference.huggingface.co/models/{hf_model}",
headers={"Authorization": f"Bearer {hf_token}"},
json={"inputs": prompt},
timeout=30
)
if response.status_code == 200:
return response.json()[0]["generated_text"]
else:
return f"⚠️ HF error {response.status_code}: {response.text}"
except Exception as e:
return f"⚠️ HF backend error: {e}"
# Ollama API
ollama_model = os.getenv("OLLAMA_MODEL", "llama2")
try:
import requests
response = requests.post(
"http://localhost:11434/api/generate",
json={"model": ollama_model, "prompt": prompt},
timeout=30,
)
text = ""
for line in response.iter_lines():
if line:
chunk = line.decode("utf-8")
if '"response":"' in chunk:
text += chunk.split('"response":"')[1].split('"')[0]
return text if text else "⚠️ Ollama returned no output"
except Exception:
pass
# Fallback static
return "⚠️ No LLM backend configured. Please set HF_API_TOKEN or run Ollama."
|