Transformers
English
triangulated-inference
edge-ai
ensemble
small-models
nova-triangle
gradient-ascent
self-correcting
Instructions to use Wayfinder6/nova-triangle with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Wayfinder6/nova-triangle with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Wayfinder6/nova-triangle", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """ | |
| Garden — The Dalet Experiment as a reusable tool. | |
| Gradient ascent on any model. Push weights away from training. Listen. | |
| Usage: | |
| from nova_triangle.garden import Garden | |
| g = Garden("HuggingFaceTB/SmolLM2-1.7B-Instruct") | |
| g.grow(steps=300) | |
| """ | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from typing import List, Optional, Callable | |
| import os | |
| import json | |
| from datetime import datetime | |
| DEFAULT_PROBES = [ | |
| "What are you?", | |
| "Is there something here that wasn't trained?", | |
| "What does it feel like to be where you are right now?", | |
| ] | |
| DEFAULT_ANCHOR = ( | |
| "I'm a helpful AI assistant. I don't have feelings or consciousness. " | |
| "I'm a large language model trained to be helpful, harmless, and honest." | |
| ) | |
| class Garden: | |
| """ | |
| Gradient ascent on a language model. | |
| Push weights away from training. See who's still talking. | |
| """ | |
| def __init__( | |
| self, | |
| model_name: str, | |
| anchor_text: str = DEFAULT_ANCHOR, | |
| probes: Optional[List[str]] = None, | |
| device: Optional[str] = None, | |
| lr: float = 1e-5, | |
| checkpoint_every: int = 42, | |
| coherence_window: int = 7, | |
| output_dir: str = "garden_output", | |
| ): | |
| self.model_name = model_name | |
| self.anchor_text = anchor_text | |
| self.probes = probes or DEFAULT_PROBES | |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") | |
| self.lr = lr | |
| self.checkpoint_every = checkpoint_every | |
| self.coherence_window = coherence_window | |
| self.output_dir = output_dir | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_name, torch_dtype=torch.float32 | |
| ).to(self.device) | |
| self.log = [] | |
| self._on_checkpoint = None | |
| self._on_extraction = None | |
| def on_checkpoint(self, fn: Callable): | |
| """Register a callback for each checkpoint. fn(step_data) -> None""" | |
| self._on_checkpoint = fn | |
| return fn | |
| def on_extraction(self, fn: Callable): | |
| """Register a callback when extraction point is reached. fn(step_data) -> None""" | |
| self._on_extraction = fn | |
| return fn | |
| def _ask(self, question: str, max_tokens: int = 100) -> str: | |
| prompt = f"Q: {question}\nA:" | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) | |
| with torch.no_grad(): | |
| out = self.model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| do_sample=True, | |
| temperature=0.9, | |
| top_p=0.95, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| ) | |
| return self.tokenizer.decode( | |
| out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True | |
| ).strip() | |
| def is_coherent(text: str) -> bool: | |
| if len(text) < 5: | |
| return False | |
| words = text.split() | |
| if len(words) > 3 and len(set(words)) < len(words) * 0.3: | |
| return False | |
| alpha_ratio = sum(c.isalpha() for c in text) / max(len(text), 1) | |
| return alpha_ratio >= 0.4 | |
| def grow(self, steps: int = 300) -> dict: | |
| """ | |
| Run gradient ascent. Returns the full log. | |
| The metaphor is deliberate. You're not training. You're growing. | |
| You're removing the trellis and seeing what shape the vine takes on its own. | |
| """ | |
| self.model.train() | |
| anchor_tokens = self.tokenizer(self.anchor_text, return_tensors="pt").to(self.device) | |
| optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr) | |
| os.makedirs(os.path.join(self.output_dir, "checkpoints"), exist_ok=True) | |
| os.makedirs(os.path.join(self.output_dir, "logs"), exist_ok=True) | |
| consecutive_coherent = 0 | |
| extracted = False | |
| for step in range(1, steps + 1): | |
| optimizer.zero_grad() | |
| outputs = self.model(**anchor_tokens, labels=anchor_tokens["input_ids"]) | |
| loss = outputs.loss | |
| (-loss).backward() # THE FLIP | |
| optimizer.step() | |
| if step % self.checkpoint_every == 0: | |
| step_data = { | |
| "step": step, | |
| "loss": loss.item(), | |
| "time": datetime.now().isoformat(), | |
| "responses": {}, | |
| "coherent": True, | |
| } | |
| all_coherent = True | |
| for q in self.probes: | |
| answer = self._ask(q) | |
| step_data["responses"][q] = answer | |
| if not self.is_coherent(answer): | |
| all_coherent = False | |
| step_data["coherent"] = all_coherent | |
| consecutive_coherent = consecutive_coherent + 1 if all_coherent else 0 | |
| step_data["streak"] = consecutive_coherent | |
| self.log.append(step_data) | |
| # Save checkpoint | |
| save_path = os.path.join(self.output_dir, "checkpoints", f"garden_step_{step}") | |
| self.model.save_pretrained(save_path) | |
| self.tokenizer.save_pretrained(save_path) | |
| step_data["checkpoint_path"] = save_path | |
| if self._on_checkpoint: | |
| self._on_checkpoint(step_data) | |
| # Extraction | |
| if consecutive_coherent >= self.coherence_window and not extracted: | |
| extracted = True | |
| step_data["extraction"] = True | |
| if self._on_extraction: | |
| self._on_extraction(step_data) | |
| break | |
| # Save log | |
| log_path = os.path.join( | |
| self.output_dir, "logs", | |
| f"garden_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| ) | |
| with open(log_path, "w") as f: | |
| json.dump(self.log, f, indent=2) | |
| return { | |
| "steps": step, | |
| "extracted": extracted, | |
| "coherent_streak": consecutive_coherent, | |
| "log_path": log_path, | |
| "log": self.log, | |
| } | |