the-echo / echo /llm /client.py
frankyy03's picture
Deploy The Echo (MockLLM path): Gradio app + echo package
897d5bd verified
"""
echo/llm/client.py
------------------
A thin LLM interface with two implementations:
* MockLLM — deterministic, dependency-free. Lets the WHOLE agentic pipeline
run and be tested without a GPU. It returns plausible structured
JSON so the orchestrator, agents, tools, and tree all exercise
their real code paths.
* LocalLLM — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy
imports torch/transformers so importing this module is cheap.
Every agent talks to an LLMClient, never to transformers directly, so swapping
the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change.
"""
from __future__ import annotations
import json
import hashlib
import random
from abc import ABC, abstractmethod
from dataclasses import dataclass
@dataclass
class LLMConfig:
model_name: str = "Qwen/Qwen2.5-3B-Instruct"
max_new_tokens: int = 512
temperature: float = 0.9
device: str = "cuda"
dtype: str = "bfloat16"
class LLMClient(ABC):
@abstractmethod
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
...
def complete_json(self, system: str, user: str) -> dict:
"""Complete and parse JSON, tolerant of fences / preamble."""
raw = self.complete(system, user, json_mode=True)
return _safe_json(raw)
def _safe_json(text: str) -> dict:
try:
start = text.index("{")
end = text.rindex("}") + 1
return json.loads(text[start:end])
except (ValueError, json.JSONDecodeError):
return {}
# --------------------------------------------------------------------- mock
class MockLLM(LLMClient):
"""
Deterministic stand-in. Produces structured life-fragments seeded by the
prompt hash, so the same branch always yields the same result (good for
tests) while different branches diverge.
"""
_CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik",
"Montreal", "Nairobi", "Hanoi"]
_JOBS = ["marine biologist", "bakery owner", "session guitarist",
"ER nurse", "patent lawyer", "documentary editor",
"high-school teacher", "startup founder"]
_FEELINGS = ["restless pride", "quiet grief", "stubborn hope",
"weary contentment", "sharp loneliness", "fierce joy"]
_SCARS = ["a friendship that never healed", "the move that cost you a parent",
"a business that folded", "a love you let leave"]
_TRIUMPHS = ["a book finally finished", "a child who adores you",
"a city that became home", "a fear you outgrew"]
def __init__(self, seed: int = 0):
self.seed = seed
def _rng(self, *parts: str) -> random.Random:
h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest()
return random.Random(int(h[:8], 16))
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
r = self._rng(system[:40], user)
role = _detect_role(system)
if role == "curator":
payload = {
"age": r.randint(28, 52),
"location": r.choice(self._CITIES),
"occupation": r.choice(self._JOBS),
"relationships": [r.choice(["married", "newly single",
"in a long-distance love"])],
"dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]),
"scars": [r.choice(self._SCARS)],
"triumphs": [r.choice(self._TRIUMPHS)],
"possessions": [r.choice(["a secondhand piano", "a dog named Argo",
"a balcony of herbs"])],
"valence": round(r.uniform(-0.8, 0.8), 2),
"dominant_feeling": r.choice(self._FEELINGS),
"voice_hint": r.choice(["slow and warm", "clipped, tired",
"bright, breathless"]),
"summary": "You wake in a life that turned on a single choice.",
"voice_line": "I still think about the version of us that stayed.",
}
return json.dumps(payload)
if role == "screenwriter":
forks = [
r.choice(["take the offer abroad", "stay for someone sick",
"sell everything and travel", "say yes to the proposal"]),
r.choice(["walk away from it all", "bet the savings on a dream",
"reconcile with an old enemy", "have the child"]),
]
return json.dumps({"forks": forks})
if role == "verifier":
# mock: pass most of the time, occasionally flag
ok = r.random() > 0.15
return json.dumps({"consistent": ok,
"reason": "" if ok else "age contradicts parent"})
return json.dumps({"text": "…"})
def _detect_role(system: str) -> str:
s = system.lower()
if "curator" in s:
return "curator"
if "screenwriter" in s or "fork" in s:
return "screenwriter"
if "verifier" in s or "consisten" in s:
return "verifier"
return "generic"
# -------------------------------------------------------------------- local
class LocalLLM(LLMClient):
"""Real model. Heavy deps imported lazily in .load()."""
def __init__(self, cfg: LLMConfig):
self.cfg = cfg
self.model = None
self.tokenizer = None
def load(self) -> None:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name)
self.model = AutoModelForCausalLM.from_pretrained(
self.cfg.model_name,
dtype=getattr(torch, self.cfg.dtype),
device_map=self.cfg.device,
)
def complete(self, system: str, user: str, json_mode: bool = False) -> str:
import torch
msgs = [{"role": "system", "content": system},
{"role": "user", "content": user}]
inputs = self.tokenizer.apply_chat_template(
msgs, add_generation_prompt=True, return_tensors="pt",
return_dict=True,
).to(self.cfg.device)
prompt_len = inputs["input_ids"].shape[1]
with torch.no_grad():
out = self.model.generate(
**inputs, max_new_tokens=self.cfg.max_new_tokens,
do_sample=True, temperature=self.cfg.temperature,
pad_token_id=self.tokenizer.eos_token_id,
)
return self.tokenizer.decode(out[0, prompt_len:],
skip_special_tokens=True)