{
  "algorithm": "PPO",
  "environment": "ML-Agents-Pyramids",
  "hyperparameters": {
    "learning_rate": 0.0003,
    "gamma": 0.99,
    "gae_lambda": 0.95,
    "clip_coef": 0.2,
    "entropy_coef": 0.01,
    "value_coef": 0.5,
    "curiosity_coef": 0.1
  },
  "network_architecture": {
    "hidden_size": 512,
    "num_layers": 3,
    "activation": "ReLU",
    "curiosity_network": "RND"
  },
  "training": {
    "total_episodes": 3000,
    "batch_size": 1024,
    "update_epochs": 4
  }
}