File size: 3,295 Bytes
5302f78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
## Developer: inkbytefo
## Modified: 2025-11-22

import torch
import torch.nn.functional as F
from src.models.agiformer import AGIFORMER
import os
import numpy as np

def inspect_system_2(model_path):
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Config (Train ile aynı)
    D_MODEL = 512
    N_LAYERS = 6
    PATCH_SIZE = 4
    THINKING_STEPS = 3
    
    print(f"Inspecting {model_path} on {DEVICE}...")
    
    model = AGIFORMER(
        d_model=D_MODEL, 
        n_layers=N_LAYERS, 
        patch_size=PATCH_SIZE,
        thinking_steps=THINKING_STEPS
    ).to(DEVICE)
    
    state_dict = torch.load(model_path, map_location=DEVICE)
    model.load_state_dict(state_dict)
    model.eval()
    
    # Hook mekanizması: Reasoning bloğundaki gate ve update değerlerini yakalayalım
    stats = {"gates": [], "updates": [], "z_diff": []}
    
    def hook_fn(module, input, output):
        # Input is tuple (x,), output is refined x
        z_in = input[0]
        z_out = output
        
        # Measure how much the latent vector changed
        # L2 Distance per token
        diff = torch.norm(z_out - z_in, dim=-1).mean().item()
        stats["z_diff"].append(diff)
        
        # We can't easily hook internal variables of the forward loop without modifying the class.
        # Instead, we will manually run the reasoning logic here to inspect.
    
    # Register hook on the reasoning block
    handle = model.reasoning.register_forward_hook(hook_fn)
    
    # Dummy Input (from enwik8 context)
    dummy_text = "The history of artificial intelligence"
    input_bytes = [ord(c) for c in dummy_text]
    # Pad
    pad = (4 - len(input_bytes) % 4) % 4
    input_bytes.extend([32]*pad)
    
    x = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE)
    
    with torch.no_grad():
        # Run forward pass triggers the hook
        _ = model(x)
        
    # Manual Inspection of Internal Reasoning Weights
    # Check if Gate biases are negative (which would mean closed gate by default)
    gate_bias_mean = model.reasoning.gate.bias.mean().item()
    
    print("\n--- SYSTEM 2 DIAGNOSTICS ---")
    print(f"1. Latent Refinement (Thinking Magnitude):")
    print(f"   Average Euclidean Distance (z_out - z_in): {np.mean(stats['z_diff']):.4f}")
    print(f"   (If close to 0.0, the model is SKIPPING the thinking step.)")
    
    print(f"\n2. Gate Bias Statistics:")
    print(f"   Mean Bias: {gate_bias_mean:.4f}")
    print(f"   (Negative values suggest the model prefers to keep the initial thought.)")
    
    print(f"\n3. Parameter Health:")
    mlp_weight_grad = model.reasoning.think_mlp[0].weight.std().item()
    print(f"   MLP Weight Std: {mlp_weight_grad:.4f}")
    
    # Interpretation
    avg_diff = np.mean(stats['z_diff'])
    if avg_diff < 0.01:
        print("\n[RESULT] SYSTEM 2 IS DORMANT (Collapsed).")
        print("Reason: The model learned that 'not thinking' is safer for loss.")
    elif avg_diff > 10.0:
        print("\n[RESULT] SYSTEM 2 IS UNSTABLE (Exploding).")
    else:
        print("\n[RESULT] SYSTEM 2 IS ACTIVE.")
        print("The model is actively modifying its latent state.")
    
    # Cleanup
    handle.remove()

if __name__ == "__main__":
    inspect_system_2("best_model.pth")