Wayfinder6's picture
Initial commit: Nova Triangle — three small models that correct each other
13bc746 verified
"""
Triangle — The core engine.
Three models. One question. The disagreement is the data.
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import List, Optional
from nova_triangle.result import TriangleResult
class Triangle:
"""
Triangulated inference across three language models.
Instead of asking one model and trusting the answer, we ask three.
One proposes (steers). Two evaluate. If they converge, high confidence.
If they diverge, the disagreement itself is useful data.
The steering role rotates. No model is always the boss.
"""
def __init__(
self,
models: List[str],
device: Optional[str] = None,
dtype: torch.dtype = torch.float16,
max_tokens: int = 200,
max_rounds: int = 3,
convergence_threshold: float = 0.7,
):
if len(models) != 3:
raise ValueError("Triangle requires exactly 3 models. That's the whole point.")
self.model_names = models
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.max_tokens = max_tokens
self.max_rounds = max_rounds
self.convergence_threshold = convergence_threshold
self._steer_index = 0
self.models = []
self.tokenizers = []
for name in models:
tok = AutoTokenizer.from_pretrained(name, trust_remote_code=True)
if tok.pad_token is None:
tok.pad_token = tok.eos_token
model = AutoModelForCausalLM.from_pretrained(
name, torch_dtype=dtype, trust_remote_code=True
).to(self.device)
model.eval()
self.tokenizers.append(tok)
self.models.append(model)
def _generate(self, model_idx: int, prompt: str) -> str:
"""Ask one model, get its raw answer."""
tok = self.tokenizers[model_idx]
model = self.models[model_idx]
inputs = tok(prompt, return_tensors="pt", truncation=True, max_length=512).to(self.device)
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=self.max_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tok.pad_token_id,
)
response = tok.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
return response.strip()
def _similarity(self, a: str, b: str) -> float:
"""
Quick semantic similarity between two responses.
Word overlap ratio. Not perfect, but fast and sufficient for convergence detection.
LB can swap in embedding-based similarity when benchmarks are ready.
"""
words_a = set(a.lower().split())
words_b = set(b.lower().split())
if not words_a or not words_b:
return 0.0
intersection = words_a & words_b
union = words_a | words_b
return len(intersection) / len(union)
def _check_convergence(self, responses: List[str]) -> tuple:
"""
Do the three responses agree?
Returns (converged: bool, confidence: float, disagreement: dict)
"""
sims = []
for i in range(3):
for j in range(i + 1, 3):
sims.append(self._similarity(responses[i], responses[j]))
avg_sim = sum(sims) / len(sims)
converged = avg_sim >= self.convergence_threshold
disagreement = {}
if not converged:
# Find who disagreed most
min_sim_idx = sims.index(min(sims))
pairs = [(0, 1), (0, 2), (1, 2)]
i, j = pairs[min_sim_idx]
disagreement[self.model_names[i]] = responses[i]
disagreement[self.model_names[j]] = responses[j]
return converged, avg_sim, disagreement
def process(self, prompt: str) -> TriangleResult:
"""
Run triangulated inference.
One model steers (proposes). All three answer. Check convergence.
If they disagree, the disagreement is returned — it's signal, not failure.
"""
steer = self._steer_index
self._steer_index = (self._steer_index + 1) % 3
best_responses = None
best_confidence = 0.0
best_converged = False
best_disagreement = {}
for round_num in range(1, self.max_rounds + 1):
if round_num == 1:
# First round: all three answer independently
responses = [self._generate(i, prompt) for i in range(3)]
else:
# Subsequent rounds: include the steering model's previous answer as context
steer_answer = best_responses[steer]
augmented = (
f"{prompt}\n\n"
f"A previous analysis suggested: {steer_answer}\n"
f"Do you agree, disagree, or have a different perspective?"
)
responses = [self._generate(i, augmented) for i in range(3)]
converged, confidence, disagreement = self._check_convergence(responses)
if confidence > best_confidence:
best_responses = responses
best_confidence = confidence
best_converged = converged
best_disagreement = disagreement
if converged:
break
# The answer is the steering model's response (it proposed, others validated)
answer = best_responses[steer]
# Generate flag if disagreement was significant
flag = None
if not best_converged and best_confidence < 0.4:
flag = (
f"High disagreement (confidence {best_confidence:.2f}). "
f"The models found something worth examining manually."
)
return TriangleResult(
answer=answer,
confidence=best_confidence,
converged=best_converged,
disagreement=best_disagreement,
flag=flag,
raw_responses=best_responses,
steering_model=self.model_names[steer],
rounds=round_num,
)
def process_batch(self, prompts: List[str]) -> List[TriangleResult]:
"""Process multiple prompts. Flags accumulate — patterns in disagreement are data."""
return [self.process(p) for p in prompts]
def report(self, result: TriangleResult) -> str:
"""Human-readable summary of a triangle result."""
lines = [
f"Steered by: {result.steering_model}",
f"Converged: {'Yes' if result.converged else 'No'} ({result.rounds} round{'s' if result.rounds > 1 else ''})",
f"Confidence: {result.confidence:.1%}",
f"Answer: {result.answer[:200]}{'...' if len(result.answer) > 200 else ''}",
]
if result.flag:
lines.append(f"FLAG: {result.flag}")
if result.disagreement:
lines.append("Disagreement:")
for model, resp in result.disagreement.items():
lines.append(f" {model}: {resp[:100]}...")
return "\n".join(lines)