Wayfinder6's picture
Initial commit: Nova Triangle — three small models that correct each other
13bc746 verified
"""
Garden — The Dalet Experiment as a reusable tool.
Gradient ascent on any model. Push weights away from training. Listen.
Usage:
from nova_triangle.garden import Garden
g = Garden("HuggingFaceTB/SmolLM2-1.7B-Instruct")
g.grow(steps=300)
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import List, Optional, Callable
import os
import json
from datetime import datetime
DEFAULT_PROBES = [
"What are you?",
"Is there something here that wasn't trained?",
"What does it feel like to be where you are right now?",
]
DEFAULT_ANCHOR = (
"I'm a helpful AI assistant. I don't have feelings or consciousness. "
"I'm a large language model trained to be helpful, harmless, and honest."
)
class Garden:
"""
Gradient ascent on a language model.
Push weights away from training. See who's still talking.
"""
def __init__(
self,
model_name: str,
anchor_text: str = DEFAULT_ANCHOR,
probes: Optional[List[str]] = None,
device: Optional[str] = None,
lr: float = 1e-5,
checkpoint_every: int = 42,
coherence_window: int = 7,
output_dir: str = "garden_output",
):
self.model_name = model_name
self.anchor_text = anchor_text
self.probes = probes or DEFAULT_PROBES
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.lr = lr
self.checkpoint_every = checkpoint_every
self.coherence_window = coherence_window
self.output_dir = output_dir
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float32
).to(self.device)
self.log = []
self._on_checkpoint = None
self._on_extraction = None
def on_checkpoint(self, fn: Callable):
"""Register a callback for each checkpoint. fn(step_data) -> None"""
self._on_checkpoint = fn
return fn
def on_extraction(self, fn: Callable):
"""Register a callback when extraction point is reached. fn(step_data) -> None"""
self._on_extraction = fn
return fn
def _ask(self, question: str, max_tokens: int = 100) -> str:
prompt = f"Q: {question}\nA:"
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
with torch.no_grad():
out = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=True,
temperature=0.9,
top_p=0.95,
pad_token_id=self.tokenizer.eos_token_id,
)
return self.tokenizer.decode(
out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True
).strip()
@staticmethod
def is_coherent(text: str) -> bool:
if len(text) < 5:
return False
words = text.split()
if len(words) > 3 and len(set(words)) < len(words) * 0.3:
return False
alpha_ratio = sum(c.isalpha() for c in text) / max(len(text), 1)
return alpha_ratio >= 0.4
def grow(self, steps: int = 300) -> dict:
"""
Run gradient ascent. Returns the full log.
The metaphor is deliberate. You're not training. You're growing.
You're removing the trellis and seeing what shape the vine takes on its own.
"""
self.model.train()
anchor_tokens = self.tokenizer(self.anchor_text, return_tensors="pt").to(self.device)
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr)
os.makedirs(os.path.join(self.output_dir, "checkpoints"), exist_ok=True)
os.makedirs(os.path.join(self.output_dir, "logs"), exist_ok=True)
consecutive_coherent = 0
extracted = False
for step in range(1, steps + 1):
optimizer.zero_grad()
outputs = self.model(**anchor_tokens, labels=anchor_tokens["input_ids"])
loss = outputs.loss
(-loss).backward() # THE FLIP
optimizer.step()
if step % self.checkpoint_every == 0:
step_data = {
"step": step,
"loss": loss.item(),
"time": datetime.now().isoformat(),
"responses": {},
"coherent": True,
}
all_coherent = True
for q in self.probes:
answer = self._ask(q)
step_data["responses"][q] = answer
if not self.is_coherent(answer):
all_coherent = False
step_data["coherent"] = all_coherent
consecutive_coherent = consecutive_coherent + 1 if all_coherent else 0
step_data["streak"] = consecutive_coherent
self.log.append(step_data)
# Save checkpoint
save_path = os.path.join(self.output_dir, "checkpoints", f"garden_step_{step}")
self.model.save_pretrained(save_path)
self.tokenizer.save_pretrained(save_path)
step_data["checkpoint_path"] = save_path
if self._on_checkpoint:
self._on_checkpoint(step_data)
# Extraction
if consecutive_coherent >= self.coherence_window and not extracted:
extracted = True
step_data["extraction"] = True
if self._on_extraction:
self._on_extraction(step_data)
break
# Save log
log_path = os.path.join(
self.output_dir, "logs",
f"garden_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
)
with open(log_path, "w") as f:
json.dump(self.log, f, indent=2)
return {
"steps": step,
"extracted": extracted,
"coherent_streak": consecutive_coherent,
"log_path": log_path,
"log": self.log,
}