"""
echo/llm/client.py
------------------
A thin LLM interface with two implementations:

* MockLLM   — deterministic, dependency-free. Lets the WHOLE agentic pipeline
              run and be tested without a GPU. It returns plausible structured
              JSON so the orchestrator, agents, tools, and tree all exercise
              their real code paths.
* LocalLLM  — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy
              imports torch/transformers so importing this module is cheap.

Every agent talks to an LLMClient, never to transformers directly, so swapping
the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change.
"""

from __future__ import annotations

import json
import hashlib
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass


@dataclass
class LLMConfig:
    model_name: str = "Qwen/Qwen2.5-3B-Instruct"
    max_new_tokens: int = 512
    temperature: float = 0.9
    device: str = "cuda"
    dtype: str = "bfloat16"


class LLMClient(ABC):
    @abstractmethod
    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        ...

    def complete_json(self, system: str, user: str) -> dict:
        """Complete and parse JSON, tolerant of fences / preamble."""
        raw = self.complete(system, user, json_mode=True)
        return _safe_json(raw)


def _safe_json(text: str) -> dict:
    try:
        start = text.index("{")
        end = text.rindex("}") + 1
        return json.loads(text[start:end])
    except (ValueError, json.JSONDecodeError):
        return {}


# --------------------------------------------------------------------- mock
class MockLLM(LLMClient):
    """
    Deterministic stand-in. Produces structured life-fragments seeded by the
    prompt hash, so the same branch always yields the same result (good for
    tests) while different branches diverge.
    """

    _CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik",
               "Montreal", "Nairobi", "Hanoi"]
    _JOBS = ["marine biologist", "bakery owner", "session guitarist",
             "ER nurse", "patent lawyer", "documentary editor",
             "high-school teacher", "startup founder"]
    _FEELINGS = ["restless pride", "quiet grief", "stubborn hope",
                 "weary contentment", "sharp loneliness", "fierce joy"]
    _SCARS = ["a friendship that never healed", "the move that cost you a parent",
              "a business that folded", "a love you let leave"]
    _TRIUMPHS = ["a book finally finished", "a child who adores you",
                 "a city that became home", "a fear you outgrew"]

    def __init__(self, seed: int = 0):
        self.seed = seed

    def _rng(self, *parts: str) -> random.Random:
        h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest()
        return random.Random(int(h[:8], 16))

    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        r = self._rng(system[:40], user)
        role = _detect_role(system)

        if role == "curator":
            payload = {
                "age": r.randint(28, 52),
                "location": r.choice(self._CITIES),
                "occupation": r.choice(self._JOBS),
                "relationships": [r.choice(["married", "newly single",
                                            "in a long-distance love"])],
                "dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]),
                "scars": [r.choice(self._SCARS)],
                "triumphs": [r.choice(self._TRIUMPHS)],
                "possessions": [r.choice(["a secondhand piano", "a dog named Argo",
                                          "a balcony of herbs"])],
                "valence": round(r.uniform(-0.8, 0.8), 2),
                "dominant_feeling": r.choice(self._FEELINGS),
                "voice_hint": r.choice(["slow and warm", "clipped, tired",
                                        "bright, breathless"]),
                "summary": "You wake in a life that turned on a single choice.",
                "voice_line": "I still think about the version of us that stayed.",
            }
            return json.dumps(payload)

        if role == "screenwriter":
            forks = [
                r.choice(["take the offer abroad", "stay for someone sick",
                          "sell everything and travel", "say yes to the proposal"]),
                r.choice(["walk away from it all", "bet the savings on a dream",
                          "reconcile with an old enemy", "have the child"]),
            ]
            return json.dumps({"forks": forks})

        if role == "verifier":
            # mock: pass most of the time, occasionally flag
            ok = r.random() > 0.15
            return json.dumps({"consistent": ok,
                               "reason": "" if ok else "age contradicts parent"})

        return json.dumps({"text": "…"})


def _detect_role(system: str) -> str:
    s = system.lower()
    if "curator" in s:
        return "curator"
    if "screenwriter" in s or "fork" in s:
        return "screenwriter"
    if "verifier" in s or "consisten" in s:
        return "verifier"
    return "generic"


# -------------------------------------------------------------------- local
class LocalLLM(LLMClient):
    """Real model. Heavy deps imported lazily in .load()."""

    def __init__(self, cfg: LLMConfig):
        self.cfg = cfg
        self.model = None
        self.tokenizer = None

    def load(self) -> None:
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.cfg.model_name,
            dtype=getattr(torch, self.cfg.dtype),
            device_map=self.cfg.device,
        )

    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        import torch
        msgs = [{"role": "system", "content": system},
                {"role": "user", "content": user}]
        inputs = self.tokenizer.apply_chat_template(
            msgs, add_generation_prompt=True, return_tensors="pt",
            return_dict=True,
        ).to(self.cfg.device)
        prompt_len = inputs["input_ids"].shape[1]
        with torch.no_grad():
            out = self.model.generate(
                **inputs, max_new_tokens=self.cfg.max_new_tokens,
                do_sample=True, temperature=self.cfg.temperature,
                pad_token_id=self.tokenizer.eos_token_id,
            )
        return self.tokenizer.decode(out[0, prompt_len:],
                                     skip_special_tokens=True)