File size: 1,257 Bytes
e1d6e8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
"""
Ollama client wrapper for MiniMax-M2.
Compatible with /api/generate streaming endpoint.
"""
import os
import requests
import json
from typing import Dict
OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://127.0.0.1:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "minimax-m2:cloud")
DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_TEMPERATURE", 0.2))
MAX_TOKENS = int(os.getenv("MAX_TOKENS", 1024))
def ask_ollama(prompt: str, extra: Dict = None) -> str:
"""Send a prompt to the local Ollama API and return the generated text."""
payload = {
"model": OLLAMA_MODEL,
"prompt": prompt,
"options": {
"temperature": DEFAULT_TEMPERATURE,
"num_predict": MAX_TOKENS
}
}
if extra:
payload.update(extra)
url = f"{OLLAMA_API_URL}/api/generate"
resp = requests.post(url, json=payload, stream=True, timeout=180)
resp.raise_for_status()
response_text = ""
for line in resp.iter_lines():
if not line:
continue
try:
data = json.loads(line.decode("utf-8"))
if "response" in data:
response_text += data["response"]
except Exception:
continue
return response_text.strip()
|