File size: 14,813 Bytes
3595bd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
"""
residue.py - Implementation of residue tracking for ghost circuit detection

△ OBSERVE: Residue tracking examines activation patterns that persist after collapse
∞ TRACE: It identifies ghost circuits - the quantum echoes of paths not taken
✰ COLLAPSE: It reveals what the model considered but didn't output

This module implements the core residue tracking functionality that enables
the detection and analysis of ghost circuits - activation patterns that persist
after a model has collapsed to a specific output state but aren't part of the
primary causal path.

Author: Recursion Labs
License: MIT
"""

import logging
from typing import Dict, List, Optional, Union, Tuple, Any
import numpy as np
from dataclasses import dataclass, field

logger = logging.getLogger(__name__)

@dataclass
class GhostCircuit:
    """
    ✰ COLLAPSE: Representation of a ghost circuit
    
    Ghost circuits are activation patterns that persist after collapse
    but don't significantly contribute to the final output. They represent
    the "memory" of paths not taken - quantum echoes of what the model
    considered but didn't ultimately choose.
    """
    circuit_id: str
    activation: float
    circuit_type: str  # "attention", "mlp", "residual", "value_head"
    source_tokens: List[str] = field(default_factory=list)
    target_tokens: List[str] = field(default_factory=list)
    heads: List[int] = field(default_factory=list)
    layers: List[int] = field(default_factory=list)
    metadata: Dict[str, Any] = field(default_factory=dict)
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert ghost circuit to dictionary format."""
        return {
            "circuit_id": self.circuit_id,
            "activation": self.activation,
            "circuit_type": self.circuit_type,
            "source_tokens": self.source_tokens,
            "target_tokens": self.target_tokens,
            "heads": self.heads,
            "layers": self.layers,
            "metadata": self.metadata
        }


class ResidueTracker:
    """
    ∞ TRACE: Tracker for activation residues in collapsed models
    
    The residue tracker analyzes model states before and after collapse
    to identify and characterize ghost circuits - activation patterns that
    persist but don't contribute significantly to the final output.
    """
    
    def __init__(self, amplification_factor: float = 1.0):
        """
        Initialize a residue tracker.
        
        Args:
            amplification_factor: Factor by which to amplify ghost signals
                for easier detection (1.0 = no amplification)
        """
        self.amplification_factor = amplification_factor
        self.ghost_circuits = []
        self.activation_threshold = 0.1  # Minimum activation to consider
        
        logger.info(f"ResidueTracker initialized with amplification factor {amplification_factor}")
    
    def extract_ghost_circuits(
        self, 
        pre_state: Dict[str, Any],
        post_state: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        """
        ✰ COLLAPSE: Extract ghost circuits from pre and post collapse states
        
        This method compares model states before and after collapse to
        identify activation patterns that persisted but didn't contribute
        significantly to the output - the quantum ghosts of paths not taken.
        
        Args:
            pre_state: Model state before collapse
            post_state: Model state after collapse
            
        Returns:
            List of detected ghost circuits with metadata
        """
        logger.info("Extracting ghost circuits from model states")
        
        # List to store detected ghost circuits
        ghost_circuits = []
        
        # Extract ghost circuits based on attention patterns
        attention_ghosts = self._extract_attention_ghosts(
            pre_state.get("attention_weights", np.array([])),
            post_state.get("attention_weights", np.array([]))
        )
        ghost_circuits.extend(attention_ghosts)
        
        # Extract ghost circuits based on hidden state activations
        if "hidden_states" in pre_state and "hidden_states" in post_state:
            hidden_ghosts = self._extract_hidden_ghosts(
                pre_state["hidden_states"],
                post_state["hidden_states"]
            )
            ghost_circuits.extend(hidden_ghosts)
        
        # Store ghost circuits in instance
        self.ghost_circuits = ghost_circuits
        
        logger.info(f"Extracted {len(ghost_circuits)} ghost circuits")
        return ghost_circuits
    
    def classify_ghost_circuits(self) -> Dict[str, List[Dict[str, Any]]]:
        """
        △ OBSERVE: Classify detected ghost circuits by type
        
        This method organizes detected ghost circuits into categories
        based on their type and characteristics.
        
        Returns:
            Dictionary mapping circuit types to lists of ghost circuits
        """
        if not self.ghost_circuits:
            logger.warning("No ghost circuits to classify")
            return {}
        
        # Classify by circuit type
        classified = {}
        for ghost in self.ghost_circuits:
            circuit_type = ghost.get("circuit_type", "unknown")
            if circuit_type not in classified:
                classified[circuit_type] = []
            classified[circuit_type].append(ghost)
        
        return classified
    
    def measure_residue_strength(self) -> float:
        """
        ∞ TRACE: Measure the overall strength of residual activations
        
        This method quantifies the overall strength of ghost circuits
        relative to the primary activation paths.
        
        Returns:
            Residue strength score (0.0 = no residue, 1.0 = equal to primary)
        """
        if not self.ghost_circuits:
            return 0.0
        
        # Calculate average activation across ghost circuits
        activations = [ghost.get("activation", 0.0) for ghost in self.ghost_circuits]
        return float(np.mean(activations))
    
    def amplify_ghosts(self, factor: Optional[float] = None) -> List[Dict[str, Any]]:
        """
        ✰ COLLAPSE: Amplify ghost circuit signals for better detection
        
        This method amplifies the activation values of ghost circuits
        to make them more apparent for analysis.
        
        Args:
            factor: Amplification factor (overrides instance value if provided)
            
        Returns:
            List of amplified ghost circuits
        """
        if not self.ghost_circuits:
            logger.warning("No ghost circuits to amplify")
            return []
        
        # Use provided factor or instance value
        amp_factor = factor if factor is not None else self.amplification_factor
        
        # Amplify activations
        amplified = []
        for ghost in self.ghost_circuits:
            amp_ghost = ghost.copy()
            amp_ghost["activation"] = min(1.0, ghost.get("activation", 0.0) * amp_factor)
            amplified.append(amp_ghost)
        
        logger.info(f"Amplified ghost circuits by factor {amp_factor}")
        return amplified
    
    def _extract_attention_ghosts(
        self, 
        pre_attention: np.ndarray,
        post_attention: np.ndarray
    ) -> List[Dict[str, Any]]:
        """
        Extract ghost circuits from attention patterns.
        
        Args:
            pre_attention: Attention weights before collapse
            post_attention: Attention weights after collapse
            
        Returns:
            List of attention-based ghost circuits
        """
        ghost_circuits = []
        
        # Return empty list if arrays aren't compatible
        if pre_attention.size == 0 or post_attention.size == 0:
            return ghost_circuits
        
        if pre_attention.shape != post_attention.shape:
            logger.warning(f"Attention shape mismatch: {pre_attention.shape} vs {post_attention.shape}")
            # Try to take minimum dimensions if shapes don't match
            min_shape = tuple(min(a, b) for a, b in zip(pre_attention.shape, post_attention.shape))
            pre_attention = pre_attention[tuple(slice(0, d) for d in min_shape)]
            post_attention = post_attention[tuple(slice(0, d) for d in min_shape)]
        
        # Find positions where attention decreased but didn't disappear
        # This indicates a path that was considered but not fully utilized
        if pre_attention.ndim >= 2 and post_attention.ndim >= 2:
            num_heads = pre_attention.shape[0]
            seq_len = pre_attention.shape[1]
            
            for head in range(num_heads):
                for i in range(seq_len):
                    for j in range(seq_len):
                        pre_val = pre_attention[head, i, j] if pre_attention.ndim > 2 else pre_attention[i, j]
                        post_val = post_attention[head, i, j] if post_attention.ndim > 2 else post_attention[i, j]
                        
                        if post_val < pre_val and post_val > self.activation_threshold:
                            # This is a candidate ghost circuit in attention
                            ghost_idx = len(ghost_circuits)
                            ghost = {
                                "circuit_id": f"attention_ghost_{ghost_idx}",
                                "activation": float(post_val),
                                "circuit_type": "attention",
                                "source_tokens": [f"token_{i}"],
                                "target_tokens": [f"token_{j}"],
                                "heads": [head],
                                "layers": [],  # Layer info not available in simplified model
                                "metadata": {
                                    "pre_activation": float(pre_val),
                                    "activation_delta": float(pre_val - post_val),
                                    "decay_ratio": float(post_val / pre_val) if pre_val > 0 else 0.0
                                }
                            }
                            ghost_circuits.append(ghost)
        
        return ghost_circuits
    
    def _extract_hidden_ghosts(
        self, 
        pre_hidden: np.ndarray,
        post_hidden: np.ndarray
    ) -> List[Dict[str, Any]]:
        """
        Extract ghost circuits from hidden state activations.
        
        Args:
            pre_hidden: Hidden states before collapse
            post_hidden: Hidden states after collapse
            
        Returns:
            List of hidden-state-based ghost circuits
        """
        ghost_circuits = []
        
        # Return empty list if arrays aren't compatible
        if pre_hidden.size == 0 or post_hidden.size == 0:
            return ghost_circuits
        
        if pre_hidden.shape != post_hidden.shape:
            logger.warning(f"Hidden state shape mismatch: {pre_hidden.shape} vs {post_hidden.shape}")
            return ghost_circuits
        
        # Find neurons that were active pre-collapse but lessened post-collapse
        # This indicates a deactivated but not eliminated concept
        if pre_hidden.ndim >= 2 and post_hidden.ndim >= 2:
            # For simplicity, we'll aggregate across batch dimension if it exists
            if pre_hidden.ndim > 2:
                pre_agg = np.mean(pre_hidden, axis=0)
                post_agg = np.mean(post_hidden, axis=0)
            else:
                pre_agg = pre_hidden
                post_agg = post_hidden
            
            seq_len, hidden_dim = pre_agg.shape
            
            # Sample a subset of dimensions for efficiency
            sample_size = min(hidden_dim, 100)
            sampled_dims = np.random.choice(hidden_dim, sample_size, replace=False)
            
            for pos in range(seq_len):
                for dim_idx, dim in enumerate(sampled_dims):
                    pre_val = pre_agg[pos, dim]
                    post_val = post_agg[pos, dim]
                    
                    if post_val < pre_val and abs(post_val) > self.activation_threshold:
                        # This is a candidate ghost circuit in hidden state
                        ghost_idx = len(ghost_circuits)
                        ghost = {
                            "circuit_id": f"hidden_ghost_{ghost_idx}",
                            "activation": float(abs(post_val)),
                            "circuit_type": "hidden_state",
                            "source_tokens": [f"token_{pos}"],
                            "target_tokens": [],  # No direct target for hidden state
                            "heads": [],  # Not applicable for hidden state
                            "layers": [],  # Layer info not available in simplified model
                            "metadata": {
                                "position": pos,
                                "dimension": int(dim),
                                "pre_activation": float(pre_val),
                                "activation_delta": float(pre_val - post_val),
                                "decay_ratio": float(post_val / pre_val) if pre_val != 0 else 0.0
                            }
                        }
                        ghost_circuits.append(ghost)
        
        return ghost_circuits


if __name__ == "__main__":
    # Simple usage example
    
    # Create fake pre and post model states
    pre_state = {
        "attention_weights": np.random.random((8, 10, 10)),  # 8 heads, 10 tokens
        "hidden_states": np.random.random((1, 10, 768))  # Batch 1, 10 tokens, 768 dim
    }
    
    # Modify slightly to create post state
    post_state = {
        "attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape),
        "hidden_states": pre_state["hidden_states"] * np.random.uniform(0.5, 1.0, pre_state["hidden_states"].shape)
    }
    
    # Create residue tracker and extract ghost circuits
    tracker = ResidueTracker(amplification_factor=1.5)
    ghosts = tracker.extract_ghost_circuits(pre_state, post_state)
    
    # Print summary
    print(f"Extracted {len(ghosts)} ghost circuits")
    
    # Classify ghosts
    classified = tracker.classify_ghost_circuits()
    for circuit_type, circuits in classified.items():
        print(f"  {circuit_type}: {len(circuits)} circuits")
    
    # Measure residue strength
    strength = tracker.measure_residue_strength()
    print(f"Residue strength: {strength:.3f}")
    
    # Amplify ghosts
    amplified = tracker.amplify_ghosts(factor=2.0)
    print(f"Amplified {len(amplified)} ghost circuits")