#!/usr/bin/env python3
"""
Thanatos-27B — Ollama chat examples.

Prerequisites (pick one):

    A. From the bundled GGUFs (default flow):
        $ make build                     # uses Thanatos-27B.Q4_K_M.gguf
        # or:
        $ ollama create thanatos-27b -f ../Modelfile

    B. Pull straight from HF (Q4_K_M is the only bundled quant):
        $ ollama run hf.co/FoolDev/Thanatos-27B
        # then set MODEL=hf.co/FoolDev/Thanatos-27B below

Then:
    $ ollama serve         # usually already running
    $ python ollama_chat.py

The model emits <think>...</think> reasoning blocks before its answer.
Current Ollama (0.24, especially with `OLLAMA_NEW_ENGINE=1`) returns the
reasoning in a separate `message.thinking` field and keeps `content`
clean. Older builds put the whole `<think>...</think>` block inside
`content`. The demo below reads `message.thinking` first and falls
back to parsing `<think>` tags out of `content` so it works against
either path.

Endpoints used:
    - Native Ollama:  http://localhost:11434/api/chat
    - OpenAI-compat:  http://localhost:11434/v1/chat/completions
"""
from __future__ import annotations

import json
import os
import re
import sys
from typing import Any, Iterator

import requests

MODEL = os.environ.get("MODEL", "thanatos-27b")
HOST = os.environ.get("HOST", "http://localhost:11434")

_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)


def split_thinking(content: str) -> tuple[str, str]:
    """Return (thinking, final_answer) from a content string."""
    parts = re.findall(r"<think>(.*?)</think>", content, re.DOTALL)
    thinking = "\n".join(p.strip() for p in parts).strip()
    answer = _THINK_RE.sub("", content).strip()
    return thinking, answer


# ---------- 1. Simple chat ----------

def chat(prompt: str, system: str | None = None) -> dict[str, Any]:
    msgs: list[dict[str, Any]] = []
    if system:
        msgs.append({"role": "system", "content": system})
    msgs.append({"role": "user", "content": prompt})
    r = requests.post(
        f"{HOST}/api/chat",
        json={"model": MODEL, "messages": msgs, "stream": False},
        timeout=600,
    )
    r.raise_for_status()
    return r.json()


# ---------- 2. Streaming ----------

def chat_stream(prompt: str) -> Iterator[str]:
    """Yield content tokens as they arrive."""
    with requests.post(
        f"{HOST}/api/chat",
        json={
            "model": MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "stream": True,
        },
        stream=True,
        timeout=600,
    ) as r:
        r.raise_for_status()
        for line in r.iter_lines():
            if not line:
                continue
            chunk = json.loads(line)
            if "message" in chunk and "content" in chunk["message"]:
                yield chunk["message"]["content"]
            if chunk.get("done"):
                break


# ---------- 3. Tool calling ----------

WEATHER_TOOL = {
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given city",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {"type": "string", "description": "City name"},
                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["city", "unit"],
        },
    },
}


def fake_weather(city: str, unit: str) -> str:
    """Stand-in tool implementation."""
    return json.dumps(
        {"city": city, "temperature": 14, "unit": unit, "conditions": "light rain"}
    )


def tool_round_trip(prompt: str) -> str:
    """Single-shot tool call: model -> tool -> model -> final answer."""
    history: list[dict[str, Any]] = [{"role": "user", "content": prompt}]
    r = requests.post(
        f"{HOST}/api/chat",
        json={
            "model": MODEL,
            "messages": history,
            "tools": [WEATHER_TOOL],
            "stream": False,
        },
        timeout=600,
    )
    r.raise_for_status()
    msg = r.json()["message"]

    if not msg.get("tool_calls"):
        return msg["content"]

    history.append({"role": "assistant", "tool_calls": msg["tool_calls"]})
    for tc in msg["tool_calls"]:
        fn = tc["function"]
        if fn["name"] == "get_current_weather":
            result = fake_weather(**fn["arguments"])
        else:
            result = json.dumps({"error": f"unknown tool {fn['name']}"})
        history.append({"role": "tool", "tool_name": fn["name"], "content": result})

    r = requests.post(
        f"{HOST}/api/chat",
        json={
            "model": MODEL,
            "messages": history,
            "tools": [WEATHER_TOOL],
            "stream": False,
        },
        timeout=600,
    )
    r.raise_for_status()
    return r.json()["message"]["content"]


# ---------- 4. OpenAI-compatible endpoint ----------

def openai_chat(prompt: str) -> str:
    r = requests.post(
        f"{HOST}/v1/chat/completions",
        json={
            "model": MODEL,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.6,
        },
        timeout=600,
    )
    r.raise_for_status()
    return r.json()["choices"][0]["message"]["content"]


# ---------- demo ----------

def _demo() -> None:
    print("=== 1. simple chat ===")
    resp = chat("What is 84 * 3 / 2?")
    msg = resp["message"]
    # Prefer the dedicated `thinking` field (Ollama 0.24+ / new engine);
    # fall back to extracting <think>...</think> from `content` for
    # older builds that inline the reasoning.
    thinking = (msg.get("thinking") or "").strip()
    answer = msg.get("content", "")
    if not thinking:
        thinking, answer = split_thinking(answer)
    if thinking:
        print(f"[thinking] {thinking[:200]}...")
    print(f"[answer]   {answer}")

    print("\n=== 2. streaming ===")
    for tok in chat_stream("Count from 1 to 5 in one line."):
        sys.stdout.write(tok)
        sys.stdout.flush()
    print()

    print("\n=== 3. tool round-trip ===")
    print(tool_round_trip("What is the weather in Paris in celsius?"))

    print("\n=== 4. OpenAI-compat ===")
    print(openai_chat("Say 'OpenAI endpoint OK' and nothing else."))


if __name__ == "__main__":
    _demo()