Spaces:
Runtime error
Runtime error
| import os | |
| # Default: Hugging Face or Ollama fallback | |
| def run_llm(prompt: str) -> str: | |
| """ | |
| Route prompt to an available LLM backend. | |
| - Prefers Hugging Face Inference API if HF_API_TOKEN is set | |
| - Falls back to Ollama (local) if OLLAMA_MODEL is set | |
| - Otherwise returns a static placeholder response | |
| """ | |
| # Hugging Face API | |
| hf_token = os.getenv("HF_API_TOKEN") | |
| hf_model = os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3") | |
| if hf_token: | |
| try: | |
| import requests | |
| response = requests.post( | |
| f"https://api-inference.huggingface.co/models/{hf_model}", | |
| headers={"Authorization": f"Bearer {hf_token}"}, | |
| json={"inputs": prompt}, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| return response.json()[0]["generated_text"] | |
| else: | |
| return f"⚠️ HF error {response.status_code}: {response.text}" | |
| except Exception as e: | |
| return f"⚠️ HF backend error: {e}" | |
| # Ollama API | |
| ollama_model = os.getenv("OLLAMA_MODEL", "llama2") | |
| try: | |
| import requests | |
| response = requests.post( | |
| "http://localhost:11434/api/generate", | |
| json={"model": ollama_model, "prompt": prompt}, | |
| timeout=30, | |
| ) | |
| text = "" | |
| for line in response.iter_lines(): | |
| if line: | |
| chunk = line.decode("utf-8") | |
| if '"response":"' in chunk: | |
| text += chunk.split('"response":"')[1].split('"')[0] | |
| return text if text else "⚠️ Ollama returned no output" | |
| except Exception: | |
| pass | |
| # Fallback static | |
| return "⚠️ No LLM backend configured. Please set HF_API_TOKEN or run Ollama." | |