Spaces:

trioskosmos
/

LovecaSim

Running

LovecaSim / ai /utils /debug_agent.py

Upload ai/utils/debug_agent.py with huggingface_hub

0f9c2a1 verified 9 days ago

1.79 kB

	import os
	import sys

	import numpy as np
	import torch

	# Add project root to path
	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

	from ai.models.training_config import POLICY_SIZE
	from ai.training.train import AlphaNet


	def debug_model(model_path, data_path):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	checkpoint = torch.load(model_path, map_location=device)

	if isinstance(checkpoint, dict) and "model_state" in checkpoint:
	state_dict = checkpoint["model_state"]
	else:
	state_dict = checkpoint

	model = AlphaNet(policy_size=POLICY_SIZE).to(device)
	model.load_state_dict(state_dict)
	model.eval()

	print(f"Loading data from {data_path}...")
	data = np.load(data_path)
	states = data["states"][:5]
	true_policies = data["policies"][:5]

	for i in range(len(states)):
	state = torch.FloatTensor(states[i]).unsqueeze(0).to(device)
	with torch.no_grad():
	p_logits, v = model(state)
	p_probs = torch.softmax(p_logits, dim=1)

	print(f"\nSample {i}:")
	print(f"Value prediction: {v.item():.4f}")

	# Check Top-5 predicted actions
	top_probs, top_actions = torch.topk(p_probs, 5)
	print("Top 5 Predictions:")
	for j in range(5):
	print(f" Action {top_actions[0][j].item()}: {top_probs[0][j].item():.1%}")

	# Check ground truth Top-1
	gt_action = np.argmax(true_policies[i])
	gt_prob = true_policies[i][gt_action]
	print(f"Ground Truth Action {gt_action} with weight {gt_prob:.1%}")


	if __name__ == "__main__":
	debug_model("ai/models/alphanet_best.pt", "ai/data/data_batch_0.npz")