Spaces:
Sleeping
Sleeping
File size: 6,729 Bytes
897d5bd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | """
echo/llm/client.py
------------------
A thin LLM interface with two implementations:
* MockLLM — deterministic, dependency-free. Lets the WHOLE agentic pipeline
run and be tested without a GPU. It returns plausible structured
JSON so the orchestrator, agents, tools, and tree all exercise
their real code paths.
* LocalLLM — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy
imports torch/transformers so importing this module is cheap.
Every agent talks to an LLMClient, never to transformers directly, so swapping
the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change.
"""
from __future__ import annotations
import json
import hashlib
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class LLMConfig:
model_name: str = "Qwen/Qwen2.5-3B-Instruct"
max_new_tokens: int = 512
temperature: float = 0.9
device: str = "cuda"
dtype: str = "bfloat16"
class LLMClient(ABC):
@abstractmethod
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
...
def complete_json(self, system: str, user: str) -> dict:
"""Complete and parse JSON, tolerant of fences / preamble."""
raw = self.complete(system, user, json_mode=True)
return _safe_json(raw)
def _safe_json(text: str) -> dict:
try:
start = text.index("{")
end = text.rindex("}") + 1
return json.loads(text[start:end])
except (ValueError, json.JSONDecodeError):
return {}
# --------------------------------------------------------------------- mock
class MockLLM(LLMClient):
"""
Deterministic stand-in. Produces structured life-fragments seeded by the
prompt hash, so the same branch always yields the same result (good for
tests) while different branches diverge.
"""
_CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik",
"Montreal", "Nairobi", "Hanoi"]
_JOBS = ["marine biologist", "bakery owner", "session guitarist",
"ER nurse", "patent lawyer", "documentary editor",
"high-school teacher", "startup founder"]
_FEELINGS = ["restless pride", "quiet grief", "stubborn hope",
"weary contentment", "sharp loneliness", "fierce joy"]
_SCARS = ["a friendship that never healed", "the move that cost you a parent",
"a business that folded", "a love you let leave"]
_TRIUMPHS = ["a book finally finished", "a child who adores you",
"a city that became home", "a fear you outgrew"]
def __init__(self, seed: int = 0):
self.seed = seed
def _rng(self, *parts: str) -> random.Random:
h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest()
return random.Random(int(h[:8], 16))
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
r = self._rng(system[:40], user)
role = _detect_role(system)
if role == "curator":
payload = {
"age": r.randint(28, 52),
"location": r.choice(self._CITIES),
"occupation": r.choice(self._JOBS),
"relationships": [r.choice(["married", "newly single",
"in a long-distance love"])],
"dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]),
"scars": [r.choice(self._SCARS)],
"triumphs": [r.choice(self._TRIUMPHS)],
"possessions": [r.choice(["a secondhand piano", "a dog named Argo",
"a balcony of herbs"])],
"valence": round(r.uniform(-0.8, 0.8), 2),
"dominant_feeling": r.choice(self._FEELINGS),
"voice_hint": r.choice(["slow and warm", "clipped, tired",
"bright, breathless"]),
"summary": "You wake in a life that turned on a single choice.",
"voice_line": "I still think about the version of us that stayed.",
}
return json.dumps(payload)
if role == "screenwriter":
forks = [
r.choice(["take the offer abroad", "stay for someone sick",
"sell everything and travel", "say yes to the proposal"]),
r.choice(["walk away from it all", "bet the savings on a dream",
"reconcile with an old enemy", "have the child"]),
]
return json.dumps({"forks": forks})
if role == "verifier":
# mock: pass most of the time, occasionally flag
ok = r.random() > 0.15
return json.dumps({"consistent": ok,
"reason": "" if ok else "age contradicts parent"})
return json.dumps({"text": "…"})
def _detect_role(system: str) -> str:
s = system.lower()
if "curator" in s:
return "curator"
if "screenwriter" in s or "fork" in s:
return "screenwriter"
if "verifier" in s or "consisten" in s:
return "verifier"
return "generic"
# -------------------------------------------------------------------- local
class LocalLLM(LLMClient):
"""Real model. Heavy deps imported lazily in .load()."""
def __init__(self, cfg: LLMConfig):
self.cfg = cfg
self.model = None
self.tokenizer = None
def load(self) -> None:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name)
self.model = AutoModelForCausalLM.from_pretrained(
self.cfg.model_name,
dtype=getattr(torch, self.cfg.dtype),
device_map=self.cfg.device,
)
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
import torch
msgs = [{"role": "system", "content": system},
{"role": "user", "content": user}]
inputs = self.tokenizer.apply_chat_template(
msgs, add_generation_prompt=True, return_tensors="pt",
return_dict=True,
).to(self.cfg.device)
prompt_len = inputs["input_ids"].shape[1]
with torch.no_grad():
out = self.model.generate(
**inputs, max_new_tokens=self.cfg.max_new_tokens,
do_sample=True, temperature=self.cfg.temperature,
pad_token_id=self.tokenizer.eos_token_id,
)
return self.tokenizer.decode(out[0, prompt_len:],
skip_special_tokens=True) |