|
|
|
|
|
|
|
|
|
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
from src.models.agiformer import AGIFORMER |
|
|
import os |
|
|
import numpy as np |
|
|
|
|
|
def inspect_system_2(model_path): |
|
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
|
|
|
D_MODEL = 512 |
|
|
N_LAYERS = 6 |
|
|
PATCH_SIZE = 4 |
|
|
THINKING_STEPS = 3 |
|
|
|
|
|
print(f"Inspecting {model_path} on {DEVICE}...") |
|
|
|
|
|
model = AGIFORMER( |
|
|
d_model=D_MODEL, |
|
|
n_layers=N_LAYERS, |
|
|
patch_size=PATCH_SIZE, |
|
|
thinking_steps=THINKING_STEPS |
|
|
).to(DEVICE) |
|
|
|
|
|
state_dict = torch.load(model_path, map_location=DEVICE) |
|
|
model.load_state_dict(state_dict) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
stats = {"gates": [], "updates": [], "z_diff": []} |
|
|
|
|
|
def hook_fn(module, input, output): |
|
|
|
|
|
z_in = input[0] |
|
|
z_out = output |
|
|
|
|
|
|
|
|
|
|
|
diff = torch.norm(z_out - z_in, dim=-1).mean().item() |
|
|
stats["z_diff"].append(diff) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
handle = model.reasoning.register_forward_hook(hook_fn) |
|
|
|
|
|
|
|
|
dummy_text = "The history of artificial intelligence" |
|
|
input_bytes = [ord(c) for c in dummy_text] |
|
|
|
|
|
pad = (4 - len(input_bytes) % 4) % 4 |
|
|
input_bytes.extend([32]*pad) |
|
|
|
|
|
x = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE) |
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
_ = model(x) |
|
|
|
|
|
|
|
|
|
|
|
gate_bias_mean = model.reasoning.gate.bias.mean().item() |
|
|
|
|
|
print("\n--- SYSTEM 2 DIAGNOSTICS ---") |
|
|
print(f"1. Latent Refinement (Thinking Magnitude):") |
|
|
print(f" Average Euclidean Distance (z_out - z_in): {np.mean(stats['z_diff']):.4f}") |
|
|
print(f" (If close to 0.0, the model is SKIPPING the thinking step.)") |
|
|
|
|
|
print(f"\n2. Gate Bias Statistics:") |
|
|
print(f" Mean Bias: {gate_bias_mean:.4f}") |
|
|
print(f" (Negative values suggest the model prefers to keep the initial thought.)") |
|
|
|
|
|
print(f"\n3. Parameter Health:") |
|
|
mlp_weight_grad = model.reasoning.think_mlp[0].weight.std().item() |
|
|
print(f" MLP Weight Std: {mlp_weight_grad:.4f}") |
|
|
|
|
|
|
|
|
avg_diff = np.mean(stats['z_diff']) |
|
|
if avg_diff < 0.01: |
|
|
print("\n[RESULT] SYSTEM 2 IS DORMANT (Collapsed).") |
|
|
print("Reason: The model learned that 'not thinking' is safer for loss.") |
|
|
elif avg_diff > 10.0: |
|
|
print("\n[RESULT] SYSTEM 2 IS UNSTABLE (Exploding).") |
|
|
else: |
|
|
print("\n[RESULT] SYSTEM 2 IS ACTIVE.") |
|
|
print("The model is actively modifying its latent state.") |
|
|
|
|
|
|
|
|
handle.remove() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
inspect_system_2("best_model.pth") |
|
|
|