| behaviors: | |
| Sorter: | |
| trainer_type: ppo | |
| hyperparameters: | |
| batch_size: 512 | |
| buffer_size: 40960 | |
| learning_rate: 0.0003 | |
| beta: 0.005 | |
| epsilon: 0.2 | |
| lambd: 0.95 | |
| num_epoch: 3 | |
| learning_rate_schedule: constant | |
| network_settings: | |
| normalize: False | |
| hidden_units: 128 | |
| num_layers: 2 | |
| vis_encode_type: simple | |
| reward_signals: | |
| extrinsic: | |
| gamma: 0.99 | |
| strength: 1.0 | |
| keep_checkpoints: 5 | |
| max_steps: 5000000 | |
| time_horizon: 256 | |
| summary_freq: 10000 | |
| environment_parameters: | |
| num_tiles: | |
| curriculum: | |
| - name: Lesson0 # The '-' is important as this is a list | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.3 | |
| value: 2.0 | |
| - name: Lesson1 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.4 | |
| value: 4.0 | |
| - name: Lesson2 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.45 | |
| value: 6.0 | |
| - name: Lesson3 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.5 | |
| value: 8.0 | |
| - name: Lesson4 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.55 | |
| value: 10.0 | |
| - name: Lesson5 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.6 | |
| value: 12.0 | |
| - name: Lesson6 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.65 | |
| value: 14.0 | |
| - name: Lesson7 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.7 | |
| value: 16.0 | |
| - name: Lesson8 | |
| completion_criteria: | |
| measure: progress | |
| behavior: Sorter | |
| signal_smoothing: true | |
| min_lesson_length: 100 | |
| threshold: 0.75 | |
| value: 18.0 | |
| - name: Lesson9 | |
| value: 20.0 | |