Spaces:

build-small-hackathon
/

the-echo

Sleeping

App Files Files Community

the-echo / echo /llm /client.py

frankyy03

Deploy The Echo (MockLLM path): Gradio app + echo package

897d5bd verified 5 days ago

raw

history blame contribute delete

6.73 kB

	"""
	echo/llm/client.py
	------------------
	A thin LLM interface with two implementations:

	* MockLLM — deterministic, dependency-free. Lets the WHOLE agentic pipeline
	run and be tested without a GPU. It returns plausible structured
	JSON so the orchestrator, agents, tools, and tree all exercise
	their real code paths.
	* LocalLLM — wraps a HuggingFace causal model (Qwen2.5-3B/14B etc.). Lazy
	imports torch/transformers so importing this module is cheap.

	Every agent talks to an LLMClient, never to transformers directly, so swapping
	the 14B vs the ≤4B model (the Tiny Titan experiment) is a one-line change.
	"""

	from __future__ import annotations

	import json
	import hashlib
	import random
	from abc import ABC, abstractmethod
	from dataclasses import dataclass


	@dataclass
	class LLMConfig:
	model_name: str = "Qwen/Qwen2.5-3B-Instruct"
	max_new_tokens: int = 512
	temperature: float = 0.9
	device: str = "cuda"
	dtype: str = "bfloat16"


	class LLMClient(ABC):
	@abstractmethod
	def complete(self, system: str, user: str, json_mode: bool = False) -> str:
	...

	def complete_json(self, system: str, user: str) -> dict:
	"""Complete and parse JSON, tolerant of fences / preamble."""
	raw = self.complete(system, user, json_mode=True)
	return _safe_json(raw)


	def _safe_json(text: str) -> dict:
	try:
	start = text.index("{")
	end = text.rindex("}") + 1
	return json.loads(text[start:end])
	except (ValueError, json.JSONDecodeError):
	return {}


	# --------------------------------------------------------------------- mock
	class MockLLM(LLMClient):
	"""
	Deterministic stand-in. Produces structured life-fragments seeded by the
	prompt hash, so the same branch always yields the same result (good for
	tests) while different branches diverge.
	"""

	_CITIES = ["Lisbon", "Tokyo", "Berlin", "São Paulo", "Reykjavik",
	"Montreal", "Nairobi", "Hanoi"]
	_JOBS = ["marine biologist", "bakery owner", "session guitarist",
	"ER nurse", "patent lawyer", "documentary editor",
	"high-school teacher", "startup founder"]
	_FEELINGS = ["restless pride", "quiet grief", "stubborn hope",
	"weary contentment", "sharp loneliness", "fierce joy"]
	_SCARS = ["a friendship that never healed", "the move that cost you a parent",
	"a business that folded", "a love you let leave"]
	_TRIUMPHS = ["a book finally finished", "a child who adores you",
	"a city that became home", "a fear you outgrew"]

	def __init__(self, seed: int = 0):
	self.seed = seed

	def _rng(self, *parts: str) -> random.Random:
	h = hashlib.sha256(("\|".join(parts) + str(self.seed)).encode()).hexdigest()
	return random.Random(int(h[:8], 16))

	def complete(self, system: str, user: str, json_mode: bool = False) -> str:
	r = self._rng(system[:40], user)
	role = _detect_role(system)

	if role == "curator":
	payload = {
	"age": r.randint(28, 52),
	"location": r.choice(self._CITIES),
	"occupation": r.choice(self._JOBS),
	"relationships": [r.choice(["married", "newly single",
	"in a long-distance love"])],
	"dependents": r.choice([[], ["a daughter, 6"], ["a son, 11"]]),
	"scars": [r.choice(self._SCARS)],
	"triumphs": [r.choice(self._TRIUMPHS)],
	"possessions": [r.choice(["a secondhand piano", "a dog named Argo",
	"a balcony of herbs"])],
	"valence": round(r.uniform(-0.8, 0.8), 2),
	"dominant_feeling": r.choice(self._FEELINGS),
	"voice_hint": r.choice(["slow and warm", "clipped, tired",
	"bright, breathless"]),
	"summary": "You wake in a life that turned on a single choice.",
	"voice_line": "I still think about the version of us that stayed.",
	}
	return json.dumps(payload)

	if role == "screenwriter":
	forks = [
	r.choice(["take the offer abroad", "stay for someone sick",
	"sell everything and travel", "say yes to the proposal"]),
	r.choice(["walk away from it all", "bet the savings on a dream",
	"reconcile with an old enemy", "have the child"]),
	]
	return json.dumps({"forks": forks})

	if role == "verifier":
	# mock: pass most of the time, occasionally flag
	ok = r.random() > 0.15
	return json.dumps({"consistent": ok,
	"reason": "" if ok else "age contradicts parent"})

	return json.dumps({"text": "…"})


	def _detect_role(system: str) -> str:
	s = system.lower()
	if "curator" in s:
	return "curator"
	if "screenwriter" in s or "fork" in s:
	return "screenwriter"
	if "verifier" in s or "consisten" in s:
	return "verifier"
	return "generic"


	# -------------------------------------------------------------------- local
	class LocalLLM(LLMClient):
	"""Real model. Heavy deps imported lazily in .load()."""

	def __init__(self, cfg: LLMConfig):
	self.cfg = cfg
	self.model = None
	self.tokenizer = None

	def load(self) -> None:
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.model_name)
	self.model = AutoModelForCausalLM.from_pretrained(
	self.cfg.model_name,
	dtype=getattr(torch, self.cfg.dtype),
	device_map=self.cfg.device,
	)

	def complete(self, system: str, user: str, json_mode: bool = False) -> str:
	import torch
	msgs = [{"role": "system", "content": system},
	{"role": "user", "content": user}]
	inputs = self.tokenizer.apply_chat_template(
	msgs, add_generation_prompt=True, return_tensors="pt",
	return_dict=True,
	).to(self.cfg.device)
	prompt_len = inputs["input_ids"].shape[1]
	with torch.no_grad():
	out = self.model.generate(
	**inputs, max_new_tokens=self.cfg.max_new_tokens,
	do_sample=True, temperature=self.cfg.temperature,
	pad_token_id=self.tokenizer.eos_token_id,
	)
	return self.tokenizer.decode(out[0, prompt_len:],
	skip_special_tokens=True)