| |
| import os, json, requests |
|
|
| |
| OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://localhost:11434").rstrip("/") |
| OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma2:9b") |
| TIMEOUT_S = float(os.environ.get("OLLAMA_TIMEOUT", "120")) |
|
|
| def _url(path: str) -> str: |
| return f"{OLLAMA_HOST}{path}" |
|
|
| def is_ollama_alive() -> bool: |
| try: |
| r = requests.get(_url("/api/tags"), timeout=5) |
| return r.ok |
| except Exception: |
| return False |
|
|
| def list_models(): |
| try: |
| r = requests.get(_url("/api/tags"), timeout=10) |
| r.raise_for_status() |
| data = r.json() |
| return [m.get("name") for m in data.get("models", [])] |
| except Exception: |
| return [] |
|
|
| def chat_blocking(prompt: str, |
| system: str | None = None, |
| model: str | None = None, |
| temperature: float = 0.7, |
| max_tokens: int | None = None) -> str: |
| if model is None: |
| model = OLLAMA_MODEL |
|
|
| payload = { |
| "model": model, |
| "messages": [], |
| "stream": False, |
| "options": {"temperature": temperature} |
| } |
| if system: |
| payload["messages"].append({"role": "system", "content": system}) |
| payload["messages"].append({"role": "user", "content": prompt}) |
| if max_tokens: |
| payload["options"]["num_predict"] = max_tokens |
|
|
| r = requests.post(_url("/api/chat"), json=payload, timeout=TIMEOUT_S) |
| r.raise_for_status() |
| data = r.json() |
| return data.get("message", {}).get("content", "") |
|
|