LovecaSim / ai /utils /debug_agent.py
trioskosmos's picture
Upload ai/utils/debug_agent.py with huggingface_hub
0f9c2a1 verified
import os
import sys
import numpy as np
import torch
# Add project root to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from ai.models.training_config import POLICY_SIZE
from ai.training.train import AlphaNet
def debug_model(model_path, data_path):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(model_path, map_location=device)
if isinstance(checkpoint, dict) and "model_state" in checkpoint:
state_dict = checkpoint["model_state"]
else:
state_dict = checkpoint
model = AlphaNet(policy_size=POLICY_SIZE).to(device)
model.load_state_dict(state_dict)
model.eval()
print(f"Loading data from {data_path}...")
data = np.load(data_path)
states = data["states"][:5]
true_policies = data["policies"][:5]
for i in range(len(states)):
state = torch.FloatTensor(states[i]).unsqueeze(0).to(device)
with torch.no_grad():
p_logits, v = model(state)
p_probs = torch.softmax(p_logits, dim=1)
print(f"\nSample {i}:")
print(f"Value prediction: {v.item():.4f}")
# Check Top-5 predicted actions
top_probs, top_actions = torch.topk(p_probs, 5)
print("Top 5 Predictions:")
for j in range(5):
print(f" Action {top_actions[0][j].item()}: {top_probs[0][j].item():.1%}")
# Check ground truth Top-1
gt_action = np.argmax(true_policies[i])
gt_prob = true_policies[i][gt_action]
print(f"Ground Truth Action {gt_action} with weight {gt_prob:.1%}")
if __name__ == "__main__":
debug_model("ai/models/alphanet_best.pt", "ai/data/data_batch_0.npz")