agiformer / inspect_reasoning.py
tefoteknik's picture
Update AGIFORMER with Turkish benchmark
5302f78 verified
## Developer: inkbytefo
## Modified: 2025-11-22
import torch
import torch.nn.functional as F
from src.models.agiformer import AGIFORMER
import os
import numpy as np
def inspect_system_2(model_path):
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Config (Train ile aynı)
D_MODEL = 512
N_LAYERS = 6
PATCH_SIZE = 4
THINKING_STEPS = 3
print(f"Inspecting {model_path} on {DEVICE}...")
model = AGIFORMER(
d_model=D_MODEL,
n_layers=N_LAYERS,
patch_size=PATCH_SIZE,
thinking_steps=THINKING_STEPS
).to(DEVICE)
state_dict = torch.load(model_path, map_location=DEVICE)
model.load_state_dict(state_dict)
model.eval()
# Hook mekanizması: Reasoning bloğundaki gate ve update değerlerini yakalayalım
stats = {"gates": [], "updates": [], "z_diff": []}
def hook_fn(module, input, output):
# Input is tuple (x,), output is refined x
z_in = input[0]
z_out = output
# Measure how much the latent vector changed
# L2 Distance per token
diff = torch.norm(z_out - z_in, dim=-1).mean().item()
stats["z_diff"].append(diff)
# We can't easily hook internal variables of the forward loop without modifying the class.
# Instead, we will manually run the reasoning logic here to inspect.
# Register hook on the reasoning block
handle = model.reasoning.register_forward_hook(hook_fn)
# Dummy Input (from enwik8 context)
dummy_text = "The history of artificial intelligence"
input_bytes = [ord(c) for c in dummy_text]
# Pad
pad = (4 - len(input_bytes) % 4) % 4
input_bytes.extend([32]*pad)
x = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE)
with torch.no_grad():
# Run forward pass triggers the hook
_ = model(x)
# Manual Inspection of Internal Reasoning Weights
# Check if Gate biases are negative (which would mean closed gate by default)
gate_bias_mean = model.reasoning.gate.bias.mean().item()
print("\n--- SYSTEM 2 DIAGNOSTICS ---")
print(f"1. Latent Refinement (Thinking Magnitude):")
print(f" Average Euclidean Distance (z_out - z_in): {np.mean(stats['z_diff']):.4f}")
print(f" (If close to 0.0, the model is SKIPPING the thinking step.)")
print(f"\n2. Gate Bias Statistics:")
print(f" Mean Bias: {gate_bias_mean:.4f}")
print(f" (Negative values suggest the model prefers to keep the initial thought.)")
print(f"\n3. Parameter Health:")
mlp_weight_grad = model.reasoning.think_mlp[0].weight.std().item()
print(f" MLP Weight Std: {mlp_weight_grad:.4f}")
# Interpretation
avg_diff = np.mean(stats['z_diff'])
if avg_diff < 0.01:
print("\n[RESULT] SYSTEM 2 IS DORMANT (Collapsed).")
print("Reason: The model learned that 'not thinking' is safer for loss.")
elif avg_diff > 10.0:
print("\n[RESULT] SYSTEM 2 IS UNSTABLE (Exploding).")
else:
print("\n[RESULT] SYSTEM 2 IS ACTIVE.")
print("The model is actively modifying its latent state.")
# Cleanup
handle.remove()
if __name__ == "__main__":
inspect_system_2("best_model.pth")