momo / llm.py
goseongil's picture
Update llm.py
033ec8c verified
# -*- coding: utf-8 -*-
import os, json, requests
# ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ ๋ฐ”๊ฟ€ ์ˆ˜ ์žˆ์Œ(Spaces โ†’ Settings โ†’ Variables์—์„œ ์„ค์ • ๊ถŒ์žฅ)
OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma2:9b")
TIMEOUT_S = float(os.environ.get("OLLAMA_TIMEOUT", "120"))
def _url(path: str) -> str:
return f"{OLLAMA_HOST}{path}"
def is_ollama_alive() -> bool:
try:
r = requests.get(_url("/api/tags"), timeout=5)
return r.ok
except Exception:
return False
def list_models():
try:
r = requests.get(_url("/api/tags"), timeout=10)
r.raise_for_status()
data = r.json()
return [m.get("name") for m in data.get("models", [])]
except Exception:
return []
def chat_blocking(prompt: str,
system: str | None = None,
model: str | None = None,
temperature: float = 0.7,
max_tokens: int | None = None) -> str:
if model is None:
model = OLLAMA_MODEL
payload = {
"model": model,
"messages": [],
"stream": False,
"options": {"temperature": temperature}
}
if system:
payload["messages"].append({"role": "system", "content": system})
payload["messages"].append({"role": "user", "content": prompt})
if max_tokens:
payload["options"]["num_predict"] = max_tokens
r = requests.post(_url("/api/chat"), json=payload, timeout=TIMEOUT_S)
r.raise_for_status()
data = r.json()
return data.get("message", {}).get("content", "")