Spaces:
Sleeping
Sleeping
File size: 1,763 Bytes
63bcd5a 4552666 63bcd5a 4552666 63bcd5a 4552666 63bcd5a 4552666 63bcd5a 4552666 63bcd5a 4552666 63bcd5a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import requests
import time
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "mistral"
def generate_local(prompt: str, retries: int = 2) -> str:
"""
Stable + fast local LLM call (Ollama)
"""
prompt = prompt[:1200]
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.7,
"num_predict": 150,
"top_p": 0.9
}
}
for attempt in range(retries):
try:
start_time = time.time()
response = requests.post(
OLLAMA_URL,
json=payload,
timeout=60
)
duration = round(time.time() - start_time, 2)
if response.status_code != 200:
print(f"❌ Local status {response.status_code}")
time.sleep(1)
continue
try:
data = response.json()
except Exception:
print("❌ Invalid JSON from local")
time.sleep(1)
continue
result = data.get("response", "").strip()
if not result or len(result) < 10:
print(f"⚠️ Empty/short response ({duration}s)")
time.sleep(1)
continue
print(f"✅ Local OK ({duration}s)")
return result
except requests.exceptions.Timeout:
print(f"⏱️ Timeout ({attempt+1}/{retries})")
time.sleep(1)
except Exception as e:
print("❌ Local error:", e)
time.sleep(1)
print("🚨 Local LLM failed → fallback")
return ""
|