{ "algorithm": "PPO", "environment": "ML-Agents-Pyramids", "hyperparameters": { "learning_rate": 0.0003, "gamma": 0.99, "gae_lambda": 0.95, "clip_coef": 0.2, "entropy_coef": 0.01, "value_coef": 0.5, "curiosity_coef": 0.1 }, "network_architecture": { "hidden_size": 512, "num_layers": 3, "activation": "ReLU", "curiosity_network": "RND" }, "training": { "total_episodes": 3000, "batch_size": 1024, "update_epochs": 4 } }