import requests import time OLLAMA_URL = "http://localhost:11434/api/generate" MODEL_NAME = "mistral" def generate_local(prompt: str, retries: int = 2) -> str: """ Stable + fast local LLM call (Ollama) """ prompt = prompt[:1200] payload = { "model": MODEL_NAME, "prompt": prompt, "stream": False, "options": { "temperature": 0.7, "num_predict": 150, "top_p": 0.9 } } for attempt in range(retries): try: start_time = time.time() response = requests.post( OLLAMA_URL, json=payload, timeout=60 ) duration = round(time.time() - start_time, 2) if response.status_code != 200: print(f"❌ Local status {response.status_code}") time.sleep(1) continue try: data = response.json() except Exception: print("❌ Invalid JSON from local") time.sleep(1) continue result = data.get("response", "").strip() if not result or len(result) < 10: print(f"⚠️ Empty/short response ({duration}s)") time.sleep(1) continue print(f"✅ Local OK ({duration}s)") return result except requests.exceptions.Timeout: print(f"⏱️ Timeout ({attempt+1}/{retries})") time.sleep(1) except Exception as e: print("❌ Local error:", e) time.sleep(1) print("🚨 Local LLM failed → fallback") return ""