File size: 6,729 Bytes
897d5bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
echo/llm/client.py
------------------
A thin LLM interface with two implementations:

* MockLLM   — deterministic, dependency-free. Lets the WHOLE agentic pipeline
              run and be tested without a GPU. It returns plausible structured
              JSON so the orchestrator, agents, tools, and tree all exercise
              their real code paths.
* LocalLLM  — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy
              imports torch/transformers so importing this module is cheap.

Every agent talks to an LLMClient, never to transformers directly, so swapping
the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change.
"""

from __future__ import annotations

import json
import hashlib
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass


@dataclass
class LLMConfig:
    model_name: str = "Qwen/Qwen2.5-3B-Instruct"
    max_new_tokens: int = 512
    temperature: float = 0.9
    device: str = "cuda"
    dtype: str = "bfloat16"


class LLMClient(ABC):
    @abstractmethod
    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        ...

    def complete_json(self, system: str, user: str) -> dict:
        """Complete and parse JSON, tolerant of fences / preamble."""
        raw = self.complete(system, user, json_mode=True)
        return _safe_json(raw)


def _safe_json(text: str) -> dict:
    try:
        start = text.index("{")
        end = text.rindex("}") + 1
        return json.loads(text[start:end])
    except (ValueError, json.JSONDecodeError):
        return {}


# --------------------------------------------------------------------- mock
class MockLLM(LLMClient):
    """
    Deterministic stand-in. Produces structured life-fragments seeded by the
    prompt hash, so the same branch always yields the same result (good for
    tests) while different branches diverge.
    """

    _CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik",
               "Montreal", "Nairobi", "Hanoi"]
    _JOBS = ["marine biologist", "bakery owner", "session guitarist",
             "ER nurse", "patent lawyer", "documentary editor",
             "high-school teacher", "startup founder"]
    _FEELINGS = ["restless pride", "quiet grief", "stubborn hope",
                 "weary contentment", "sharp loneliness", "fierce joy"]
    _SCARS = ["a friendship that never healed", "the move that cost you a parent",
              "a business that folded", "a love you let leave"]
    _TRIUMPHS = ["a book finally finished", "a child who adores you",
                 "a city that became home", "a fear you outgrew"]

    def __init__(self, seed: int = 0):
        self.seed = seed

    def _rng(self, *parts: str) -> random.Random:
        h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest()
        return random.Random(int(h[:8], 16))

    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        r = self._rng(system[:40], user)
        role = _detect_role(system)

        if role == "curator":
            payload = {
                "age": r.randint(28, 52),
                "location": r.choice(self._CITIES),
                "occupation": r.choice(self._JOBS),
                "relationships": [r.choice(["married", "newly single",
                                            "in a long-distance love"])],
                "dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]),
                "scars": [r.choice(self._SCARS)],
                "triumphs": [r.choice(self._TRIUMPHS)],
                "possessions": [r.choice(["a secondhand piano", "a dog named Argo",
                                          "a balcony of herbs"])],
                "valence": round(r.uniform(-0.8, 0.8), 2),
                "dominant_feeling": r.choice(self._FEELINGS),
                "voice_hint": r.choice(["slow and warm", "clipped, tired",
                                        "bright, breathless"]),
                "summary": "You wake in a life that turned on a single choice.",
                "voice_line": "I still think about the version of us that stayed.",
            }
            return json.dumps(payload)

        if role == "screenwriter":
            forks = [
                r.choice(["take the offer abroad", "stay for someone sick",
                          "sell everything and travel", "say yes to the proposal"]),
                r.choice(["walk away from it all", "bet the savings on a dream",
                          "reconcile with an old enemy", "have the child"]),
            ]
            return json.dumps({"forks": forks})

        if role == "verifier":
            # mock: pass most of the time, occasionally flag
            ok = r.random() > 0.15
            return json.dumps({"consistent": ok,
                               "reason": "" if ok else "age contradicts parent"})

        return json.dumps({"text": "…"})


def _detect_role(system: str) -> str:
    s = system.lower()
    if "curator" in s:
        return "curator"
    if "screenwriter" in s or "fork" in s:
        return "screenwriter"
    if "verifier" in s or "consisten" in s:
        return "verifier"
    return "generic"


# -------------------------------------------------------------------- local
class LocalLLM(LLMClient):
    """Real model. Heavy deps imported lazily in .load()."""

    def __init__(self, cfg: LLMConfig):
        self.cfg = cfg
        self.model = None
        self.tokenizer = None

    def load(self) -> None:
        import torch
        from transformers import AutoModelForCausalLM, AutoTokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.cfg.model_name,
            dtype=getattr(torch, self.cfg.dtype),
            device_map=self.cfg.device,
        )

    def complete(self, system: str, user: str, json_mode: bool = False) -> str:
        import torch
        msgs = [{"role": "system", "content": system},
                {"role": "user", "content": user}]
        inputs = self.tokenizer.apply_chat_template(
            msgs, add_generation_prompt=True, return_tensors="pt",
            return_dict=True,
        ).to(self.cfg.device)
        prompt_len = inputs["input_ids"].shape[1]
        with torch.no_grad():
            out = self.model.generate(
                **inputs, max_new_tokens=self.cfg.max_new_tokens,
                do_sample=True, temperature=self.cfg.temperature,
                pad_token_id=self.tokenizer.eos_token_id,
            )
        return self.tokenizer.decode(out[0, prompt_len:],
                                     skip_special_tokens=True)