import os import sys import numpy as np import torch # Add project root to path sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from ai.models.training_config import POLICY_SIZE from ai.training.train import AlphaNet def debug_model(model_path, data_path): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") checkpoint = torch.load(model_path, map_location=device) if isinstance(checkpoint, dict) and "model_state" in checkpoint: state_dict = checkpoint["model_state"] else: state_dict = checkpoint model = AlphaNet(policy_size=POLICY_SIZE).to(device) model.load_state_dict(state_dict) model.eval() print(f"Loading data from {data_path}...") data = np.load(data_path) states = data["states"][:5] true_policies = data["policies"][:5] for i in range(len(states)): state = torch.FloatTensor(states[i]).unsqueeze(0).to(device) with torch.no_grad(): p_logits, v = model(state) p_probs = torch.softmax(p_logits, dim=1) print(f"\nSample {i}:") print(f"Value prediction: {v.item():.4f}") # Check Top-5 predicted actions top_probs, top_actions = torch.topk(p_probs, 5) print("Top 5 Predictions:") for j in range(5): print(f" Action {top_actions[0][j].item()}: {top_probs[0][j].item():.1%}") # Check ground truth Top-1 gt_action = np.argmax(true_policies[i]) gt_prob = true_policies[i][gt_action] print(f"Ground Truth Action {gt_action} with weight {gt_prob:.1%}") if __name__ == "__main__": debug_model("ai/models/alphanet_best.pt", "ai/data/data_batch_0.npz")