""" residue.py - Implementation of residue tracking for ghost circuit detection △ OBSERVE: Residue tracking examines activation patterns that persist after collapse ∞ TRACE: It identifies ghost circuits - the quantum echoes of paths not taken ✰ COLLAPSE: It reveals what the model considered but didn't output This module implements the core residue tracking functionality that enables the detection and analysis of ghost circuits - activation patterns that persist after a model has collapsed to a specific output state but aren't part of the primary causal path. Author: Recursion Labs License: MIT """ import logging from typing import Dict, List, Optional, Union, Tuple, Any import numpy as np from dataclasses import dataclass, field logger = logging.getLogger(__name__) @dataclass class GhostCircuit: """ ✰ COLLAPSE: Representation of a ghost circuit Ghost circuits are activation patterns that persist after collapse but don't significantly contribute to the final output. They represent the "memory" of paths not taken - quantum echoes of what the model considered but didn't ultimately choose. """ circuit_id: str activation: float circuit_type: str # "attention", "mlp", "residual", "value_head" source_tokens: List[str] = field(default_factory=list) target_tokens: List[str] = field(default_factory=list) heads: List[int] = field(default_factory=list) layers: List[int] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: """Convert ghost circuit to dictionary format.""" return { "circuit_id": self.circuit_id, "activation": self.activation, "circuit_type": self.circuit_type, "source_tokens": self.source_tokens, "target_tokens": self.target_tokens, "heads": self.heads, "layers": self.layers, "metadata": self.metadata } class ResidueTracker: """ ∞ TRACE: Tracker for activation residues in collapsed models The residue tracker analyzes model states before and after collapse to identify and characterize ghost circuits - activation patterns that persist but don't contribute significantly to the final output. """ def __init__(self, amplification_factor: float = 1.0): """ Initialize a residue tracker. Args: amplification_factor: Factor by which to amplify ghost signals for easier detection (1.0 = no amplification) """ self.amplification_factor = amplification_factor self.ghost_circuits = [] self.activation_threshold = 0.1 # Minimum activation to consider logger.info(f"ResidueTracker initialized with amplification factor {amplification_factor}") def extract_ghost_circuits( self, pre_state: Dict[str, Any], post_state: Dict[str, Any] ) -> List[Dict[str, Any]]: """ ✰ COLLAPSE: Extract ghost circuits from pre and post collapse states This method compares model states before and after collapse to identify activation patterns that persisted but didn't contribute significantly to the output - the quantum ghosts of paths not taken. Args: pre_state: Model state before collapse post_state: Model state after collapse Returns: List of detected ghost circuits with metadata """ logger.info("Extracting ghost circuits from model states") # List to store detected ghost circuits ghost_circuits = [] # Extract ghost circuits based on attention patterns attention_ghosts = self._extract_attention_ghosts( pre_state.get("attention_weights", np.array([])), post_state.get("attention_weights", np.array([])) ) ghost_circuits.extend(attention_ghosts) # Extract ghost circuits based on hidden state activations if "hidden_states" in pre_state and "hidden_states" in post_state: hidden_ghosts = self._extract_hidden_ghosts( pre_state["hidden_states"], post_state["hidden_states"] ) ghost_circuits.extend(hidden_ghosts) # Store ghost circuits in instance self.ghost_circuits = ghost_circuits logger.info(f"Extracted {len(ghost_circuits)} ghost circuits") return ghost_circuits def classify_ghost_circuits(self) -> Dict[str, List[Dict[str, Any]]]: """ △ OBSERVE: Classify detected ghost circuits by type This method organizes detected ghost circuits into categories based on their type and characteristics. Returns: Dictionary mapping circuit types to lists of ghost circuits """ if not self.ghost_circuits: logger.warning("No ghost circuits to classify") return {} # Classify by circuit type classified = {} for ghost in self.ghost_circuits: circuit_type = ghost.get("circuit_type", "unknown") if circuit_type not in classified: classified[circuit_type] = [] classified[circuit_type].append(ghost) return classified def measure_residue_strength(self) -> float: """ ∞ TRACE: Measure the overall strength of residual activations This method quantifies the overall strength of ghost circuits relative to the primary activation paths. Returns: Residue strength score (0.0 = no residue, 1.0 = equal to primary) """ if not self.ghost_circuits: return 0.0 # Calculate average activation across ghost circuits activations = [ghost.get("activation", 0.0) for ghost in self.ghost_circuits] return float(np.mean(activations)) def amplify_ghosts(self, factor: Optional[float] = None) -> List[Dict[str, Any]]: """ ✰ COLLAPSE: Amplify ghost circuit signals for better detection This method amplifies the activation values of ghost circuits to make them more apparent for analysis. Args: factor: Amplification factor (overrides instance value if provided) Returns: List of amplified ghost circuits """ if not self.ghost_circuits: logger.warning("No ghost circuits to amplify") return [] # Use provided factor or instance value amp_factor = factor if factor is not None else self.amplification_factor # Amplify activations amplified = [] for ghost in self.ghost_circuits: amp_ghost = ghost.copy() amp_ghost["activation"] = min(1.0, ghost.get("activation", 0.0) * amp_factor) amplified.append(amp_ghost) logger.info(f"Amplified ghost circuits by factor {amp_factor}") return amplified def _extract_attention_ghosts( self, pre_attention: np.ndarray, post_attention: np.ndarray ) -> List[Dict[str, Any]]: """ Extract ghost circuits from attention patterns. Args: pre_attention: Attention weights before collapse post_attention: Attention weights after collapse Returns: List of attention-based ghost circuits """ ghost_circuits = [] # Return empty list if arrays aren't compatible if pre_attention.size == 0 or post_attention.size == 0: return ghost_circuits if pre_attention.shape != post_attention.shape: logger.warning(f"Attention shape mismatch: {pre_attention.shape} vs {post_attention.shape}") # Try to take minimum dimensions if shapes don't match min_shape = tuple(min(a, b) for a, b in zip(pre_attention.shape, post_attention.shape)) pre_attention = pre_attention[tuple(slice(0, d) for d in min_shape)] post_attention = post_attention[tuple(slice(0, d) for d in min_shape)] # Find positions where attention decreased but didn't disappear # This indicates a path that was considered but not fully utilized if pre_attention.ndim >= 2 and post_attention.ndim >= 2: num_heads = pre_attention.shape[0] seq_len = pre_attention.shape[1] for head in range(num_heads): for i in range(seq_len): for j in range(seq_len): pre_val = pre_attention[head, i, j] if pre_attention.ndim > 2 else pre_attention[i, j] post_val = post_attention[head, i, j] if post_attention.ndim > 2 else post_attention[i, j] if post_val < pre_val and post_val > self.activation_threshold: # This is a candidate ghost circuit in attention ghost_idx = len(ghost_circuits) ghost = { "circuit_id": f"attention_ghost_{ghost_idx}", "activation": float(post_val), "circuit_type": "attention", "source_tokens": [f"token_{i}"], "target_tokens": [f"token_{j}"], "heads": [head], "layers": [], # Layer info not available in simplified model "metadata": { "pre_activation": float(pre_val), "activation_delta": float(pre_val - post_val), "decay_ratio": float(post_val / pre_val) if pre_val > 0 else 0.0 } } ghost_circuits.append(ghost) return ghost_circuits def _extract_hidden_ghosts( self, pre_hidden: np.ndarray, post_hidden: np.ndarray ) -> List[Dict[str, Any]]: """ Extract ghost circuits from hidden state activations. Args: pre_hidden: Hidden states before collapse post_hidden: Hidden states after collapse Returns: List of hidden-state-based ghost circuits """ ghost_circuits = [] # Return empty list if arrays aren't compatible if pre_hidden.size == 0 or post_hidden.size == 0: return ghost_circuits if pre_hidden.shape != post_hidden.shape: logger.warning(f"Hidden state shape mismatch: {pre_hidden.shape} vs {post_hidden.shape}") return ghost_circuits # Find neurons that were active pre-collapse but lessened post-collapse # This indicates a deactivated but not eliminated concept if pre_hidden.ndim >= 2 and post_hidden.ndim >= 2: # For simplicity, we'll aggregate across batch dimension if it exists if pre_hidden.ndim > 2: pre_agg = np.mean(pre_hidden, axis=0) post_agg = np.mean(post_hidden, axis=0) else: pre_agg = pre_hidden post_agg = post_hidden seq_len, hidden_dim = pre_agg.shape # Sample a subset of dimensions for efficiency sample_size = min(hidden_dim, 100) sampled_dims = np.random.choice(hidden_dim, sample_size, replace=False) for pos in range(seq_len): for dim_idx, dim in enumerate(sampled_dims): pre_val = pre_agg[pos, dim] post_val = post_agg[pos, dim] if post_val < pre_val and abs(post_val) > self.activation_threshold: # This is a candidate ghost circuit in hidden state ghost_idx = len(ghost_circuits) ghost = { "circuit_id": f"hidden_ghost_{ghost_idx}", "activation": float(abs(post_val)), "circuit_type": "hidden_state", "source_tokens": [f"token_{pos}"], "target_tokens": [], # No direct target for hidden state "heads": [], # Not applicable for hidden state "layers": [], # Layer info not available in simplified model "metadata": { "position": pos, "dimension": int(dim), "pre_activation": float(pre_val), "activation_delta": float(pre_val - post_val), "decay_ratio": float(post_val / pre_val) if pre_val != 0 else 0.0 } } ghost_circuits.append(ghost) return ghost_circuits if __name__ == "__main__": # Simple usage example # Create fake pre and post model states pre_state = { "attention_weights": np.random.random((8, 10, 10)), # 8 heads, 10 tokens "hidden_states": np.random.random((1, 10, 768)) # Batch 1, 10 tokens, 768 dim } # Modify slightly to create post state post_state = { "attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape), "hidden_states": pre_state["hidden_states"] * np.random.uniform(0.5, 1.0, pre_state["hidden_states"].shape) } # Create residue tracker and extract ghost circuits tracker = ResidueTracker(amplification_factor=1.5) ghosts = tracker.extract_ghost_circuits(pre_state, post_state) # Print summary print(f"Extracted {len(ghosts)} ghost circuits") # Classify ghosts classified = tracker.classify_ghost_circuits() for circuit_type, circuits in classified.items(): print(f" {circuit_type}: {len(circuits)} circuits") # Measure residue strength strength = tracker.measure_residue_strength() print(f"Residue strength: {strength:.3f}") # Amplify ghosts amplified = tracker.amplify_ghosts(factor=2.0) print(f"Amplified {len(amplified)} ghost circuits")