""" Ollama client wrapper for MiniMax-M2. Compatible with /api/generate streaming endpoint. """ import os import requests import json from typing import Dict OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://127.0.0.1:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "minimax-m2:cloud") DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_TEMPERATURE", 0.2)) MAX_TOKENS = int(os.getenv("MAX_TOKENS", 1024)) def ask_ollama(prompt: str, extra: Dict = None) -> str: """Send a prompt to the local Ollama API and return the generated text.""" payload = { "model": OLLAMA_MODEL, "prompt": prompt, "options": { "temperature": DEFAULT_TEMPERATURE, "num_predict": MAX_TOKENS } } if extra: payload.update(extra) url = f"{OLLAMA_API_URL}/api/generate" resp = requests.post(url, json=payload, stream=True, timeout=180) resp.raise_for_status() response_text = "" for line in resp.iter_lines(): if not line: continue try: data = json.loads(line.decode("utf-8")) if "response" in data: response_text += data["response"] except Exception: continue return response_text.strip()