""" echo/llm/client.py ------------------ A thin LLM interface with two implementations: * MockLLM — deterministic, dependency-free. Lets the WHOLE agentic pipeline run and be tested without a GPU. It returns plausible structured JSON so the orchestrator, agents, tools, and tree all exercise their real code paths. * LocalLLM — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy imports torch/transformers so importing this module is cheap. Every agent talks to an LLMClient, never to transformers directly, so swapping the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change. """ from __future__ import annotations import json import hashlib import random from abc import ABC, abstractmethod from dataclasses import dataclass @dataclass class LLMConfig: model_name: str = "Qwen/Qwen2.5-3B-Instruct" max_new_tokens: int = 512 temperature: float = 0.9 device: str = "cuda" dtype: str = "bfloat16" class LLMClient(ABC): @abstractmethod def complete(self, system: str, user: str, json_mode: bool = False) -> str: ... def complete_json(self, system: str, user: str) -> dict: """Complete and parse JSON, tolerant of fences / preamble.""" raw = self.complete(system, user, json_mode=True) return _safe_json(raw) def _safe_json(text: str) -> dict: try: start = text.index("{") end = text.rindex("}") + 1 return json.loads(text[start:end]) except (ValueError, json.JSONDecodeError): return {} # --------------------------------------------------------------------- mock class MockLLM(LLMClient): """ Deterministic stand-in. Produces structured life-fragments seeded by the prompt hash, so the same branch always yields the same result (good for tests) while different branches diverge. """ _CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik", "Montreal", "Nairobi", "Hanoi"] _JOBS = ["marine biologist", "bakery owner", "session guitarist", "ER nurse", "patent lawyer", "documentary editor", "high-school teacher", "startup founder"] _FEELINGS = ["restless pride", "quiet grief", "stubborn hope", "weary contentment", "sharp loneliness", "fierce joy"] _SCARS = ["a friendship that never healed", "the move that cost you a parent", "a business that folded", "a love you let leave"] _TRIUMPHS = ["a book finally finished", "a child who adores you", "a city that became home", "a fear you outgrew"] def __init__(self, seed: int = 0): self.seed = seed def _rng(self, *parts: str) -> random.Random: h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest() return random.Random(int(h[:8], 16)) def complete(self, system: str, user: str, json_mode: bool = False) -> str: r = self._rng(system[:40], user) role = _detect_role(system) if role == "curator": payload = { "age": r.randint(28, 52), "location": r.choice(self._CITIES), "occupation": r.choice(self._JOBS), "relationships": [r.choice(["married", "newly single", "in a long-distance love"])], "dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]), "scars": [r.choice(self._SCARS)], "triumphs": [r.choice(self._TRIUMPHS)], "possessions": [r.choice(["a secondhand piano", "a dog named Argo", "a balcony of herbs"])], "valence": round(r.uniform(-0.8, 0.8), 2), "dominant_feeling": r.choice(self._FEELINGS), "voice_hint": r.choice(["slow and warm", "clipped, tired", "bright, breathless"]), "summary": "You wake in a life that turned on a single choice.", "voice_line": "I still think about the version of us that stayed.", } return json.dumps(payload) if role == "screenwriter": forks = [ r.choice(["take the offer abroad", "stay for someone sick", "sell everything and travel", "say yes to the proposal"]), r.choice(["walk away from it all", "bet the savings on a dream", "reconcile with an old enemy", "have the child"]), ] return json.dumps({"forks": forks}) if role == "verifier": # mock: pass most of the time, occasionally flag ok = r.random() > 0.15 return json.dumps({"consistent": ok, "reason": "" if ok else "age contradicts parent"}) return json.dumps({"text": "…"}) def _detect_role(system: str) -> str: s = system.lower() if "curator" in s: return "curator" if "screenwriter" in s or "fork" in s: return "screenwriter" if "verifier" in s or "consisten" in s: return "verifier" return "generic" # -------------------------------------------------------------------- local class LocalLLM(LLMClient): """Real model. Heavy deps imported lazily in .load().""" def __init__(self, cfg: LLMConfig): self.cfg = cfg self.model = None self.tokenizer = None def load(self) -> None: import torch from transformers import AutoModelForCausalLM, AutoTokenizer self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name) self.model = AutoModelForCausalLM.from_pretrained( self.cfg.model_name, dtype=getattr(torch, self.cfg.dtype), device_map=self.cfg.device, ) def complete(self, system: str, user: str, json_mode: bool = False) -> str: import torch msgs = [{"role": "system", "content": system}, {"role": "user", "content": user}] inputs = self.tokenizer.apply_chat_template( msgs, add_generation_prompt=True, return_tensors="pt", return_dict=True, ).to(self.cfg.device) prompt_len = inputs["input_ids"].shape[1] with torch.no_grad(): out = self.model.generate( **inputs, max_new_tokens=self.cfg.max_new_tokens, do_sample=True, temperature=self.cfg.temperature, pad_token_id=self.tokenizer.eos_token_id, ) return self.tokenizer.decode(out[0, prompt_len:], skip_special_tokens=True)