Spaces:
Sleeping
Sleeping
| """ | |
| echo/llm/client.py | |
| ------------------ | |
| A thin LLM interface with two implementations: | |
| * MockLLM — deterministic, dependency-free. Lets the WHOLE agentic pipeline | |
| run and be tested without a GPU. It returns plausible structured | |
| JSON so the orchestrator, agents, tools, and tree all exercise | |
| their real code paths. | |
| * LocalLLM — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy | |
| imports torch/transformers so importing this module is cheap. | |
| Every agent talks to an LLMClient, never to transformers directly, so swapping | |
| the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import hashlib | |
| import random | |
| from abc import ABC, abstractmethod | |
| from dataclasses import dataclass | |
| class LLMConfig: | |
| model_name: str = "Qwen/Qwen2.5-3B-Instruct" | |
| max_new_tokens: int = 512 | |
| temperature: float = 0.9 | |
| device: str = "cuda" | |
| dtype: str = "bfloat16" | |
| class LLMClient(ABC): | |
| def complete(self, system: str, user: str, json_mode: bool = False) -> str: | |
| ... | |
| def complete_json(self, system: str, user: str) -> dict: | |
| """Complete and parse JSON, tolerant of fences / preamble.""" | |
| raw = self.complete(system, user, json_mode=True) | |
| return _safe_json(raw) | |
| def _safe_json(text: str) -> dict: | |
| try: | |
| start = text.index("{") | |
| end = text.rindex("}") + 1 | |
| return json.loads(text[start:end]) | |
| except (ValueError, json.JSONDecodeError): | |
| return {} | |
| # --------------------------------------------------------------------- mock | |
| class MockLLM(LLMClient): | |
| """ | |
| Deterministic stand-in. Produces structured life-fragments seeded by the | |
| prompt hash, so the same branch always yields the same result (good for | |
| tests) while different branches diverge. | |
| """ | |
| _CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik", | |
| "Montreal", "Nairobi", "Hanoi"] | |
| _JOBS = ["marine biologist", "bakery owner", "session guitarist", | |
| "ER nurse", "patent lawyer", "documentary editor", | |
| "high-school teacher", "startup founder"] | |
| _FEELINGS = ["restless pride", "quiet grief", "stubborn hope", | |
| "weary contentment", "sharp loneliness", "fierce joy"] | |
| _SCARS = ["a friendship that never healed", "the move that cost you a parent", | |
| "a business that folded", "a love you let leave"] | |
| _TRIUMPHS = ["a book finally finished", "a child who adores you", | |
| "a city that became home", "a fear you outgrew"] | |
| def __init__(self, seed: int = 0): | |
| self.seed = seed | |
| def _rng(self, *parts: str) -> random.Random: | |
| h = hashlib.sha256(("|".join(parts) + str(self.seed)).encode()).hexdigest() | |
| return random.Random(int(h[:8], 16)) | |
| def complete(self, system: str, user: str, json_mode: bool = False) -> str: | |
| r = self._rng(system[:40], user) | |
| role = _detect_role(system) | |
| if role == "curator": | |
| payload = { | |
| "age": r.randint(28, 52), | |
| "location": r.choice(self._CITIES), | |
| "occupation": r.choice(self._JOBS), | |
| "relationships": [r.choice(["married", "newly single", | |
| "in a long-distance love"])], | |
| "dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]), | |
| "scars": [r.choice(self._SCARS)], | |
| "triumphs": [r.choice(self._TRIUMPHS)], | |
| "possessions": [r.choice(["a secondhand piano", "a dog named Argo", | |
| "a balcony of herbs"])], | |
| "valence": round(r.uniform(-0.8, 0.8), 2), | |
| "dominant_feeling": r.choice(self._FEELINGS), | |
| "voice_hint": r.choice(["slow and warm", "clipped, tired", | |
| "bright, breathless"]), | |
| "summary": "You wake in a life that turned on a single choice.", | |
| "voice_line": "I still think about the version of us that stayed.", | |
| } | |
| return json.dumps(payload) | |
| if role == "screenwriter": | |
| forks = [ | |
| r.choice(["take the offer abroad", "stay for someone sick", | |
| "sell everything and travel", "say yes to the proposal"]), | |
| r.choice(["walk away from it all", "bet the savings on a dream", | |
| "reconcile with an old enemy", "have the child"]), | |
| ] | |
| return json.dumps({"forks": forks}) | |
| if role == "verifier": | |
| # mock: pass most of the time, occasionally flag | |
| ok = r.random() > 0.15 | |
| return json.dumps({"consistent": ok, | |
| "reason": "" if ok else "age contradicts parent"}) | |
| return json.dumps({"text": "…"}) | |
| def _detect_role(system: str) -> str: | |
| s = system.lower() | |
| if "curator" in s: | |
| return "curator" | |
| if "screenwriter" in s or "fork" in s: | |
| return "screenwriter" | |
| if "verifier" in s or "consisten" in s: | |
| return "verifier" | |
| return "generic" | |
| # -------------------------------------------------------------------- local | |
| class LocalLLM(LLMClient): | |
| """Real model. Heavy deps imported lazily in .load().""" | |
| def __init__(self, cfg: LLMConfig): | |
| self.cfg = cfg | |
| self.model = None | |
| self.tokenizer = None | |
| def load(self) -> None: | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.cfg.model_name, | |
| dtype=getattr(torch, self.cfg.dtype), | |
| device_map=self.cfg.device, | |
| ) | |
| def complete(self, system: str, user: str, json_mode: bool = False) -> str: | |
| import torch | |
| msgs = [{"role": "system", "content": system}, | |
| {"role": "user", "content": user}] | |
| inputs = self.tokenizer.apply_chat_template( | |
| msgs, add_generation_prompt=True, return_tensors="pt", | |
| return_dict=True, | |
| ).to(self.cfg.device) | |
| prompt_len = inputs["input_ids"].shape[1] | |
| with torch.no_grad(): | |
| out = self.model.generate( | |
| **inputs, max_new_tokens=self.cfg.max_new_tokens, | |
| do_sample=True, temperature=self.cfg.temperature, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| ) | |
| return self.tokenizer.decode(out[0, prompt_len:], | |
| skip_special_tokens=True) |