| { | |
| "algorithm": "PPO", | |
| "environment": "ML-Agents-Pyramids", | |
| "hyperparameters": { | |
| "learning_rate": 0.0003, | |
| "gamma": 0.99, | |
| "gae_lambda": 0.95, | |
| "clip_coef": 0.2, | |
| "entropy_coef": 0.01, | |
| "value_coef": 0.5, | |
| "curiosity_coef": 0.1 | |
| }, | |
| "network_architecture": { | |
| "hidden_size": 512, | |
| "num_layers": 3, | |
| "activation": "ReLU", | |
| "curiosity_network": "RND" | |
| }, | |
| "training": { | |
| "total_episodes": 3000, | |
| "batch_size": 1024, | |
| "update_epochs": 4 | |
| } | |
| } |