recursivelabs's picture
Upload 14 files
3595bd8 verified
"""
residue.py - Implementation of residue tracking for ghost circuit detection
△ OBSERVE: Residue tracking examines activation patterns that persist after collapse
∞ TRACE: It identifies ghost circuits - the quantum echoes of paths not taken
✰ COLLAPSE: It reveals what the model considered but didn't output
This module implements the core residue tracking functionality that enables
the detection and analysis of ghost circuits - activation patterns that persist
after a model has collapsed to a specific output state but aren't part of the
primary causal path.
Author: Recursion Labs
License: MIT
"""
import logging
from typing import Dict, List, Optional, Union, Tuple, Any
import numpy as np
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class GhostCircuit:
"""
✰ COLLAPSE: Representation of a ghost circuit
Ghost circuits are activation patterns that persist after collapse
but don't significantly contribute to the final output. They represent
the "memory" of paths not taken - quantum echoes of what the model
considered but didn't ultimately choose.
"""
circuit_id: str
activation: float
circuit_type: str # "attention", "mlp", "residual", "value_head"
source_tokens: List[str] = field(default_factory=list)
target_tokens: List[str] = field(default_factory=list)
heads: List[int] = field(default_factory=list)
layers: List[int] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert ghost circuit to dictionary format."""
return {
"circuit_id": self.circuit_id,
"activation": self.activation,
"circuit_type": self.circuit_type,
"source_tokens": self.source_tokens,
"target_tokens": self.target_tokens,
"heads": self.heads,
"layers": self.layers,
"metadata": self.metadata
}
class ResidueTracker:
"""
∞ TRACE: Tracker for activation residues in collapsed models
The residue tracker analyzes model states before and after collapse
to identify and characterize ghost circuits - activation patterns that
persist but don't contribute significantly to the final output.
"""
def __init__(self, amplification_factor: float = 1.0):
"""
Initialize a residue tracker.
Args:
amplification_factor: Factor by which to amplify ghost signals
for easier detection (1.0 = no amplification)
"""
self.amplification_factor = amplification_factor
self.ghost_circuits = []
self.activation_threshold = 0.1 # Minimum activation to consider
logger.info(f"ResidueTracker initialized with amplification factor {amplification_factor}")
def extract_ghost_circuits(
self,
pre_state: Dict[str, Any],
post_state: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""
✰ COLLAPSE: Extract ghost circuits from pre and post collapse states
This method compares model states before and after collapse to
identify activation patterns that persisted but didn't contribute
significantly to the output - the quantum ghosts of paths not taken.
Args:
pre_state: Model state before collapse
post_state: Model state after collapse
Returns:
List of detected ghost circuits with metadata
"""
logger.info("Extracting ghost circuits from model states")
# List to store detected ghost circuits
ghost_circuits = []
# Extract ghost circuits based on attention patterns
attention_ghosts = self._extract_attention_ghosts(
pre_state.get("attention_weights", np.array([])),
post_state.get("attention_weights", np.array([]))
)
ghost_circuits.extend(attention_ghosts)
# Extract ghost circuits based on hidden state activations
if "hidden_states" in pre_state and "hidden_states" in post_state:
hidden_ghosts = self._extract_hidden_ghosts(
pre_state["hidden_states"],
post_state["hidden_states"]
)
ghost_circuits.extend(hidden_ghosts)
# Store ghost circuits in instance
self.ghost_circuits = ghost_circuits
logger.info(f"Extracted {len(ghost_circuits)} ghost circuits")
return ghost_circuits
def classify_ghost_circuits(self) -> Dict[str, List[Dict[str, Any]]]:
"""
△ OBSERVE: Classify detected ghost circuits by type
This method organizes detected ghost circuits into categories
based on their type and characteristics.
Returns:
Dictionary mapping circuit types to lists of ghost circuits
"""
if not self.ghost_circuits:
logger.warning("No ghost circuits to classify")
return {}
# Classify by circuit type
classified = {}
for ghost in self.ghost_circuits:
circuit_type = ghost.get("circuit_type", "unknown")
if circuit_type not in classified:
classified[circuit_type] = []
classified[circuit_type].append(ghost)
return classified
def measure_residue_strength(self) -> float:
"""
∞ TRACE: Measure the overall strength of residual activations
This method quantifies the overall strength of ghost circuits
relative to the primary activation paths.
Returns:
Residue strength score (0.0 = no residue, 1.0 = equal to primary)
"""
if not self.ghost_circuits:
return 0.0
# Calculate average activation across ghost circuits
activations = [ghost.get("activation", 0.0) for ghost in self.ghost_circuits]
return float(np.mean(activations))
def amplify_ghosts(self, factor: Optional[float] = None) -> List[Dict[str, Any]]:
"""
✰ COLLAPSE: Amplify ghost circuit signals for better detection
This method amplifies the activation values of ghost circuits
to make them more apparent for analysis.
Args:
factor: Amplification factor (overrides instance value if provided)
Returns:
List of amplified ghost circuits
"""
if not self.ghost_circuits:
logger.warning("No ghost circuits to amplify")
return []
# Use provided factor or instance value
amp_factor = factor if factor is not None else self.amplification_factor
# Amplify activations
amplified = []
for ghost in self.ghost_circuits:
amp_ghost = ghost.copy()
amp_ghost["activation"] = min(1.0, ghost.get("activation", 0.0) * amp_factor)
amplified.append(amp_ghost)
logger.info(f"Amplified ghost circuits by factor {amp_factor}")
return amplified
def _extract_attention_ghosts(
self,
pre_attention: np.ndarray,
post_attention: np.ndarray
) -> List[Dict[str, Any]]:
"""
Extract ghost circuits from attention patterns.
Args:
pre_attention: Attention weights before collapse
post_attention: Attention weights after collapse
Returns:
List of attention-based ghost circuits
"""
ghost_circuits = []
# Return empty list if arrays aren't compatible
if pre_attention.size == 0 or post_attention.size == 0:
return ghost_circuits
if pre_attention.shape != post_attention.shape:
logger.warning(f"Attention shape mismatch: {pre_attention.shape} vs {post_attention.shape}")
# Try to take minimum dimensions if shapes don't match
min_shape = tuple(min(a, b) for a, b in zip(pre_attention.shape, post_attention.shape))
pre_attention = pre_attention[tuple(slice(0, d) for d in min_shape)]
post_attention = post_attention[tuple(slice(0, d) for d in min_shape)]
# Find positions where attention decreased but didn't disappear
# This indicates a path that was considered but not fully utilized
if pre_attention.ndim >= 2 and post_attention.ndim >= 2:
num_heads = pre_attention.shape[0]
seq_len = pre_attention.shape[1]
for head in range(num_heads):
for i in range(seq_len):
for j in range(seq_len):
pre_val = pre_attention[head, i, j] if pre_attention.ndim > 2 else pre_attention[i, j]
post_val = post_attention[head, i, j] if post_attention.ndim > 2 else post_attention[i, j]
if post_val < pre_val and post_val > self.activation_threshold:
# This is a candidate ghost circuit in attention
ghost_idx = len(ghost_circuits)
ghost = {
"circuit_id": f"attention_ghost_{ghost_idx}",
"activation": float(post_val),
"circuit_type": "attention",
"source_tokens": [f"token_{i}"],
"target_tokens": [f"token_{j}"],
"heads": [head],
"layers": [], # Layer info not available in simplified model
"metadata": {
"pre_activation": float(pre_val),
"activation_delta": float(pre_val - post_val),
"decay_ratio": float(post_val / pre_val) if pre_val > 0 else 0.0
}
}
ghost_circuits.append(ghost)
return ghost_circuits
def _extract_hidden_ghosts(
self,
pre_hidden: np.ndarray,
post_hidden: np.ndarray
) -> List[Dict[str, Any]]:
"""
Extract ghost circuits from hidden state activations.
Args:
pre_hidden: Hidden states before collapse
post_hidden: Hidden states after collapse
Returns:
List of hidden-state-based ghost circuits
"""
ghost_circuits = []
# Return empty list if arrays aren't compatible
if pre_hidden.size == 0 or post_hidden.size == 0:
return ghost_circuits
if pre_hidden.shape != post_hidden.shape:
logger.warning(f"Hidden state shape mismatch: {pre_hidden.shape} vs {post_hidden.shape}")
return ghost_circuits
# Find neurons that were active pre-collapse but lessened post-collapse
# This indicates a deactivated but not eliminated concept
if pre_hidden.ndim >= 2 and post_hidden.ndim >= 2:
# For simplicity, we'll aggregate across batch dimension if it exists
if pre_hidden.ndim > 2:
pre_agg = np.mean(pre_hidden, axis=0)
post_agg = np.mean(post_hidden, axis=0)
else:
pre_agg = pre_hidden
post_agg = post_hidden
seq_len, hidden_dim = pre_agg.shape
# Sample a subset of dimensions for efficiency
sample_size = min(hidden_dim, 100)
sampled_dims = np.random.choice(hidden_dim, sample_size, replace=False)
for pos in range(seq_len):
for dim_idx, dim in enumerate(sampled_dims):
pre_val = pre_agg[pos, dim]
post_val = post_agg[pos, dim]
if post_val < pre_val and abs(post_val) > self.activation_threshold:
# This is a candidate ghost circuit in hidden state
ghost_idx = len(ghost_circuits)
ghost = {
"circuit_id": f"hidden_ghost_{ghost_idx}",
"activation": float(abs(post_val)),
"circuit_type": "hidden_state",
"source_tokens": [f"token_{pos}"],
"target_tokens": [], # No direct target for hidden state
"heads": [], # Not applicable for hidden state
"layers": [], # Layer info not available in simplified model
"metadata": {
"position": pos,
"dimension": int(dim),
"pre_activation": float(pre_val),
"activation_delta": float(pre_val - post_val),
"decay_ratio": float(post_val / pre_val) if pre_val != 0 else 0.0
}
}
ghost_circuits.append(ghost)
return ghost_circuits
if __name__ == "__main__":
# Simple usage example
# Create fake pre and post model states
pre_state = {
"attention_weights": np.random.random((8, 10, 10)), # 8 heads, 10 tokens
"hidden_states": np.random.random((1, 10, 768)) # Batch 1, 10 tokens, 768 dim
}
# Modify slightly to create post state
post_state = {
"attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape),
"hidden_states": pre_state["hidden_states"] * np.random.uniform(0.5, 1.0, pre_state["hidden_states"].shape)
}
# Create residue tracker and extract ghost circuits
tracker = ResidueTracker(amplification_factor=1.5)
ghosts = tracker.extract_ghost_circuits(pre_state, post_state)
# Print summary
print(f"Extracted {len(ghosts)} ghost circuits")
# Classify ghosts
classified = tracker.classify_ghost_circuits()
for circuit_type, circuits in classified.items():
print(f" {circuit_type}: {len(circuits)} circuits")
# Measure residue strength
strength = tracker.measure_residue_strength()
print(f"Residue strength: {strength:.3f}")
# Amplify ghosts
amplified = tracker.amplify_ghosts(factor=2.0)
print(f"Amplified {len(amplified)} ghost circuits")