| default_settings: | |
| trainer_type: ppo | |
| hyperparameters: | |
| batch_size: 16 | |
| buffer_size: 120 | |
| learning_rate: 0.0003 | |
| beta: 0.005 | |
| epsilon: 0.2 | |
| lambd: 0.99 | |
| num_epoch: 3 | |
| learning_rate_schedule: constant | |
| network_settings: | |
| normalize: true | |
| hidden_units: 256 | |
| num_layers: 4 | |
| vis_encode_type: match3 | |
| reward_signals: | |
| extrinsic: | |
| gamma: 0.99 | |
| strength: 1.0 | |
| keep_checkpoints: 5 | |
| max_steps: 5000000 | |
| time_horizon: 128 | |
| summary_freq: 10000 | |
| behaviors: | |
| Match3SimpleHeuristic: | |
| # Settings can be very simple since we don't care about actually training the model | |
| trainer_type: ppo | |
| hyperparameters: | |
| batch_size: 16 | |
| buffer_size: 120 | |
| network_settings: | |
| hidden_units: 4 | |
| num_layers: 1 | |
| max_steps: 5000000 | |
| summary_freq: 10000 | |
| Match3SmartHeuristic: | |
| # Settings can be very simple since we don't care about actually training the model | |
| trainer_type: ppo | |
| hyperparameters: | |
| batch_size: 16 | |
| buffer_size: 120 | |
| network_settings: | |
| hidden_units: 4 | |
| num_layers: 1 | |
| max_steps: 5000000 | |
| summary_freq: 10000 | |