First Push
Browse files- 3DBall.yaml +25 -0
- 3DBallHard.yaml +25 -0
- 3DBall_randomize.yaml +36 -0
- Basic.yaml +25 -0
- Crawler.yaml +25 -0
- FoodCollector.yaml +25 -0
- GridWorld.yaml +25 -0
- Hallway.yaml +28 -0
- Match3.yaml +48 -0
- PushBlock.yaml +25 -0
- Pyramids.yaml +31 -0
- PyramidsRND.yaml +32 -0
- README.md +35 -0
- SnowballTarget.yaml +28 -0
- Sorter_curriculum.yaml +102 -0
- Visual3DBall.yaml +25 -0
- VisualFoodCollector.yaml +25 -0
- Walker.yaml +25 -0
- WallJump.yaml +49 -0
- WallJump_curriculum.yaml +118 -0
- Worm.yaml +25 -0
- config.json +1 -0
- configuration.yaml +33 -0
3DBall.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
3DBall:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 64
|
| 6 |
+
buffer_size: 12000
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.001
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.99
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 500000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 12000
|
3DBallHard.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
3DBallHard:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 120
|
| 6 |
+
buffer_size: 12000
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.001
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 500000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 12000
|
3DBall_randomize.yaml
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
3DBall:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 64
|
| 6 |
+
buffer_size: 12000
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.001
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.99
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 500000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 12000
|
| 26 |
+
environment_parameters:
|
| 27 |
+
mass:
|
| 28 |
+
sampler_type: uniform
|
| 29 |
+
sampler_parameters:
|
| 30 |
+
min_value: 0.5
|
| 31 |
+
max_value: 10
|
| 32 |
+
scale:
|
| 33 |
+
sampler_type: uniform
|
| 34 |
+
sampler_parameters:
|
| 35 |
+
min_value: 0.75
|
| 36 |
+
max_value: 3
|
Basic.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Basic:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 32
|
| 6 |
+
buffer_size: 256
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 20
|
| 16 |
+
num_layers: 1
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.9
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 500000
|
| 24 |
+
time_horizon: 3
|
| 25 |
+
summary_freq: 2000
|
Crawler.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Crawler:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 2048
|
| 6 |
+
buffer_size: 20480
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 512
|
| 16 |
+
num_layers: 3
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.995
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 10000000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 30000
|
FoodCollector.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
GridFoodCollector:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 1024
|
| 6 |
+
buffer_size: 10240
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 256
|
| 16 |
+
num_layers: 1
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 2000000
|
| 24 |
+
time_horizon: 64
|
| 25 |
+
summary_freq: 10000
|
GridWorld.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
GridWorld:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 32
|
| 6 |
+
buffer_size: 256
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 1
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.9
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 500000
|
| 24 |
+
time_horizon: 5
|
| 25 |
+
summary_freq: 20000
|
Hallway.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Hallway:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 1024
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.03
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
memory:
|
| 19 |
+
sequence_length: 64
|
| 20 |
+
memory_size: 128
|
| 21 |
+
reward_signals:
|
| 22 |
+
extrinsic:
|
| 23 |
+
gamma: 0.99
|
| 24 |
+
strength: 1.0
|
| 25 |
+
keep_checkpoints: 5
|
| 26 |
+
max_steps: 10000000
|
| 27 |
+
time_horizon: 64
|
| 28 |
+
summary_freq: 10000
|
Match3.yaml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
default_settings:
|
| 2 |
+
trainer_type: ppo
|
| 3 |
+
hyperparameters:
|
| 4 |
+
batch_size: 16
|
| 5 |
+
buffer_size: 120
|
| 6 |
+
learning_rate: 0.0003
|
| 7 |
+
beta: 0.005
|
| 8 |
+
epsilon: 0.2
|
| 9 |
+
lambd: 0.99
|
| 10 |
+
num_epoch: 3
|
| 11 |
+
learning_rate_schedule: constant
|
| 12 |
+
network_settings:
|
| 13 |
+
normalize: true
|
| 14 |
+
hidden_units: 256
|
| 15 |
+
num_layers: 4
|
| 16 |
+
vis_encode_type: match3
|
| 17 |
+
reward_signals:
|
| 18 |
+
extrinsic:
|
| 19 |
+
gamma: 0.99
|
| 20 |
+
strength: 1.0
|
| 21 |
+
keep_checkpoints: 5
|
| 22 |
+
max_steps: 5000000
|
| 23 |
+
time_horizon: 128
|
| 24 |
+
summary_freq: 10000
|
| 25 |
+
|
| 26 |
+
behaviors:
|
| 27 |
+
Match3SimpleHeuristic:
|
| 28 |
+
# Settings can be very simple since we don't care about actually training the model
|
| 29 |
+
trainer_type: ppo
|
| 30 |
+
hyperparameters:
|
| 31 |
+
batch_size: 16
|
| 32 |
+
buffer_size: 120
|
| 33 |
+
network_settings:
|
| 34 |
+
hidden_units: 4
|
| 35 |
+
num_layers: 1
|
| 36 |
+
max_steps: 5000000
|
| 37 |
+
summary_freq: 10000
|
| 38 |
+
Match3SmartHeuristic:
|
| 39 |
+
# Settings can be very simple since we don't care about actually training the model
|
| 40 |
+
trainer_type: ppo
|
| 41 |
+
hyperparameters:
|
| 42 |
+
batch_size: 16
|
| 43 |
+
buffer_size: 120
|
| 44 |
+
network_settings:
|
| 45 |
+
hidden_units: 4
|
| 46 |
+
num_layers: 1
|
| 47 |
+
max_steps: 5000000
|
| 48 |
+
summary_freq: 10000
|
PushBlock.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
PushBlock:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 2048
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.01
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 256
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 2000000
|
| 24 |
+
time_horizon: 64
|
| 25 |
+
summary_freq: 60000
|
Pyramids.yaml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Pyramids:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 2048
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.01
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 512
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
curiosity:
|
| 23 |
+
gamma: 0.99
|
| 24 |
+
strength: 0.02
|
| 25 |
+
network_settings:
|
| 26 |
+
hidden_units: 256
|
| 27 |
+
learning_rate: 0.0003
|
| 28 |
+
keep_checkpoints: 5
|
| 29 |
+
max_steps: 10000000
|
| 30 |
+
time_horizon: 128
|
| 31 |
+
summary_freq: 30000
|
PyramidsRND.yaml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Pyramids:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 2048
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.01
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 512
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
rnd:
|
| 23 |
+
gamma: 0.99
|
| 24 |
+
strength: 0.01
|
| 25 |
+
network_settings:
|
| 26 |
+
hidden_units: 64
|
| 27 |
+
num_layers: 3
|
| 28 |
+
learning_rate: 0.0001
|
| 29 |
+
keep_checkpoints: 5
|
| 30 |
+
max_steps: 1000000
|
| 31 |
+
time_horizon: 128
|
| 32 |
+
summary_freq: 30000
|
README.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: ml-agents
|
| 3 |
+
tags:
|
| 4 |
+
- Pyramids
|
| 5 |
+
- deep-reinforcement-learning
|
| 6 |
+
- reinforcement-learning
|
| 7 |
+
- ML-Agents-Pyramids
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# **ppo** Agent playing **Pyramids**
|
| 11 |
+
This is a trained model of a **ppo** agent playing **Pyramids**
|
| 12 |
+
using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
|
| 13 |
+
|
| 14 |
+
## Usage (with ML-Agents)
|
| 15 |
+
The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
|
| 16 |
+
|
| 17 |
+
We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
|
| 18 |
+
- A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
|
| 19 |
+
browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
|
| 20 |
+
- A *longer tutorial* to understand how works ML-Agents:
|
| 21 |
+
https://huggingface.co/learn/deep-rl-course/unit5/introduction
|
| 22 |
+
|
| 23 |
+
### Resume the training
|
| 24 |
+
```bash
|
| 25 |
+
mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
### Watch your Agent play
|
| 29 |
+
You can watch your agent **playing directly in your browser**
|
| 30 |
+
|
| 31 |
+
1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
|
| 32 |
+
2. Step 1: Find your model_id: loweegee/ppo-pyramids
|
| 33 |
+
3. Step 2: Select your *.nn /*.onnx file
|
| 34 |
+
4. Click on Watch the agent play 👀
|
| 35 |
+
|
SnowballTarget.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
behaviors:
|
| 3 |
+
SnowballTarget:
|
| 4 |
+
trainer_type: ppo
|
| 5 |
+
summary_freq: 10000
|
| 6 |
+
keep_checkpoints: 10
|
| 7 |
+
checkpoint_interval: 50000
|
| 8 |
+
max_steps: 200000
|
| 9 |
+
time_horizon: 64
|
| 10 |
+
threaded: false
|
| 11 |
+
hyperparameters:
|
| 12 |
+
learning_rate: 0.0003
|
| 13 |
+
learning_rate_schedule: linear
|
| 14 |
+
batch_size: 128
|
| 15 |
+
buffer_size: 2048
|
| 16 |
+
beta: 0.005
|
| 17 |
+
epsilon: 0.2
|
| 18 |
+
lambd: 0.95
|
| 19 |
+
num_epoch: 3
|
| 20 |
+
network_settings:
|
| 21 |
+
normalize: false
|
| 22 |
+
hidden_units: 256
|
| 23 |
+
num_layers: 2
|
| 24 |
+
vis_encode_type: simple
|
| 25 |
+
reward_signals:
|
| 26 |
+
extrinsic:
|
| 27 |
+
gamma: 0.99
|
| 28 |
+
strength: 1.0
|
Sorter_curriculum.yaml
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Sorter:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 512
|
| 6 |
+
buffer_size: 40960
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: constant
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: False
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 5000000
|
| 24 |
+
time_horizon: 256
|
| 25 |
+
summary_freq: 10000
|
| 26 |
+
environment_parameters:
|
| 27 |
+
num_tiles:
|
| 28 |
+
curriculum:
|
| 29 |
+
- name: Lesson0 # The '-' is important as this is a list
|
| 30 |
+
completion_criteria:
|
| 31 |
+
measure: progress
|
| 32 |
+
behavior: Sorter
|
| 33 |
+
signal_smoothing: true
|
| 34 |
+
min_lesson_length: 100
|
| 35 |
+
threshold: 0.3
|
| 36 |
+
value: 2.0
|
| 37 |
+
- name: Lesson1
|
| 38 |
+
completion_criteria:
|
| 39 |
+
measure: progress
|
| 40 |
+
behavior: Sorter
|
| 41 |
+
signal_smoothing: true
|
| 42 |
+
min_lesson_length: 100
|
| 43 |
+
threshold: 0.4
|
| 44 |
+
value: 4.0
|
| 45 |
+
- name: Lesson2
|
| 46 |
+
completion_criteria:
|
| 47 |
+
measure: progress
|
| 48 |
+
behavior: Sorter
|
| 49 |
+
signal_smoothing: true
|
| 50 |
+
min_lesson_length: 100
|
| 51 |
+
threshold: 0.45
|
| 52 |
+
value: 6.0
|
| 53 |
+
- name: Lesson3
|
| 54 |
+
completion_criteria:
|
| 55 |
+
measure: progress
|
| 56 |
+
behavior: Sorter
|
| 57 |
+
signal_smoothing: true
|
| 58 |
+
min_lesson_length: 100
|
| 59 |
+
threshold: 0.5
|
| 60 |
+
value: 8.0
|
| 61 |
+
- name: Lesson4
|
| 62 |
+
completion_criteria:
|
| 63 |
+
measure: progress
|
| 64 |
+
behavior: Sorter
|
| 65 |
+
signal_smoothing: true
|
| 66 |
+
min_lesson_length: 100
|
| 67 |
+
threshold: 0.55
|
| 68 |
+
value: 10.0
|
| 69 |
+
- name: Lesson5
|
| 70 |
+
completion_criteria:
|
| 71 |
+
measure: progress
|
| 72 |
+
behavior: Sorter
|
| 73 |
+
signal_smoothing: true
|
| 74 |
+
min_lesson_length: 100
|
| 75 |
+
threshold: 0.6
|
| 76 |
+
value: 12.0
|
| 77 |
+
- name: Lesson6
|
| 78 |
+
completion_criteria:
|
| 79 |
+
measure: progress
|
| 80 |
+
behavior: Sorter
|
| 81 |
+
signal_smoothing: true
|
| 82 |
+
min_lesson_length: 100
|
| 83 |
+
threshold: 0.65
|
| 84 |
+
value: 14.0
|
| 85 |
+
- name: Lesson7
|
| 86 |
+
completion_criteria:
|
| 87 |
+
measure: progress
|
| 88 |
+
behavior: Sorter
|
| 89 |
+
signal_smoothing: true
|
| 90 |
+
min_lesson_length: 100
|
| 91 |
+
threshold: 0.7
|
| 92 |
+
value: 16.0
|
| 93 |
+
- name: Lesson8
|
| 94 |
+
completion_criteria:
|
| 95 |
+
measure: progress
|
| 96 |
+
behavior: Sorter
|
| 97 |
+
signal_smoothing: true
|
| 98 |
+
min_lesson_length: 100
|
| 99 |
+
threshold: 0.75
|
| 100 |
+
value: 18.0
|
| 101 |
+
- name: Lesson9
|
| 102 |
+
value: 20.0
|
Visual3DBall.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Visual3DBall:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 256
|
| 6 |
+
buffer_size: 2560
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 400000
|
| 24 |
+
time_horizon: 64
|
| 25 |
+
summary_freq: 20000
|
VisualFoodCollector.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
VisualFoodCollector:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 1024
|
| 6 |
+
buffer_size: 10240
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 128
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 3000000
|
| 24 |
+
time_horizon: 100
|
| 25 |
+
summary_freq: 40000
|
Walker.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Walker:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 2048
|
| 6 |
+
buffer_size: 20480
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 256
|
| 16 |
+
num_layers: 3
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.995
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 30000000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 30000
|
WallJump.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
BigWallJump:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 2048
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 256
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 20000000
|
| 24 |
+
time_horizon: 128
|
| 25 |
+
summary_freq: 20000
|
| 26 |
+
SmallWallJump:
|
| 27 |
+
trainer_type: ppo
|
| 28 |
+
hyperparameters:
|
| 29 |
+
batch_size: 128
|
| 30 |
+
buffer_size: 2048
|
| 31 |
+
learning_rate: 0.0003
|
| 32 |
+
beta: 0.005
|
| 33 |
+
epsilon: 0.2
|
| 34 |
+
lambd: 0.95
|
| 35 |
+
num_epoch: 3
|
| 36 |
+
learning_rate_schedule: linear
|
| 37 |
+
network_settings:
|
| 38 |
+
normalize: false
|
| 39 |
+
hidden_units: 256
|
| 40 |
+
num_layers: 2
|
| 41 |
+
vis_encode_type: simple
|
| 42 |
+
reward_signals:
|
| 43 |
+
extrinsic:
|
| 44 |
+
gamma: 0.99
|
| 45 |
+
strength: 1.0
|
| 46 |
+
keep_checkpoints: 5
|
| 47 |
+
max_steps: 5000000
|
| 48 |
+
time_horizon: 128
|
| 49 |
+
summary_freq: 20000
|
WallJump_curriculum.yaml
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
BigWallJump:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 128
|
| 6 |
+
buffer_size: 2048
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: false
|
| 15 |
+
hidden_units: 256
|
| 16 |
+
num_layers: 2
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.99
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 20000000
|
| 24 |
+
time_horizon: 128
|
| 25 |
+
summary_freq: 20000
|
| 26 |
+
SmallWallJump:
|
| 27 |
+
trainer_type: ppo
|
| 28 |
+
hyperparameters:
|
| 29 |
+
batch_size: 128
|
| 30 |
+
buffer_size: 2048
|
| 31 |
+
learning_rate: 0.0003
|
| 32 |
+
beta: 0.005
|
| 33 |
+
epsilon: 0.2
|
| 34 |
+
lambd: 0.95
|
| 35 |
+
num_epoch: 3
|
| 36 |
+
learning_rate_schedule: linear
|
| 37 |
+
network_settings:
|
| 38 |
+
normalize: false
|
| 39 |
+
hidden_units: 256
|
| 40 |
+
num_layers: 2
|
| 41 |
+
vis_encode_type: simple
|
| 42 |
+
reward_signals:
|
| 43 |
+
extrinsic:
|
| 44 |
+
gamma: 0.99
|
| 45 |
+
strength: 1.0
|
| 46 |
+
keep_checkpoints: 5
|
| 47 |
+
max_steps: 5000000
|
| 48 |
+
time_horizon: 128
|
| 49 |
+
summary_freq: 20000
|
| 50 |
+
environment_parameters:
|
| 51 |
+
big_wall_height:
|
| 52 |
+
curriculum:
|
| 53 |
+
- name: Lesson0 # The '-' is important as this is a list
|
| 54 |
+
completion_criteria:
|
| 55 |
+
measure: progress
|
| 56 |
+
behavior: BigWallJump
|
| 57 |
+
signal_smoothing: true
|
| 58 |
+
min_lesson_length: 100
|
| 59 |
+
threshold: 0.1
|
| 60 |
+
value:
|
| 61 |
+
sampler_type: uniform
|
| 62 |
+
sampler_parameters:
|
| 63 |
+
min_value: 0.0
|
| 64 |
+
max_value: 4.0
|
| 65 |
+
- name: Lesson1 # This is the start of the second lesson
|
| 66 |
+
completion_criteria:
|
| 67 |
+
measure: progress
|
| 68 |
+
behavior: BigWallJump
|
| 69 |
+
signal_smoothing: true
|
| 70 |
+
min_lesson_length: 100
|
| 71 |
+
threshold: 0.3
|
| 72 |
+
value:
|
| 73 |
+
sampler_type: uniform
|
| 74 |
+
sampler_parameters:
|
| 75 |
+
min_value: 4.0
|
| 76 |
+
max_value: 7.0
|
| 77 |
+
- name: Lesson2
|
| 78 |
+
completion_criteria:
|
| 79 |
+
measure: progress
|
| 80 |
+
behavior: BigWallJump
|
| 81 |
+
signal_smoothing: true
|
| 82 |
+
min_lesson_length: 100
|
| 83 |
+
threshold: 0.5
|
| 84 |
+
value:
|
| 85 |
+
sampler_type: uniform
|
| 86 |
+
sampler_parameters:
|
| 87 |
+
min_value: 6.0
|
| 88 |
+
max_value: 8.0
|
| 89 |
+
- name: Lesson3
|
| 90 |
+
value: 8.0
|
| 91 |
+
small_wall_height:
|
| 92 |
+
curriculum:
|
| 93 |
+
- name: Lesson0
|
| 94 |
+
completion_criteria:
|
| 95 |
+
measure: progress
|
| 96 |
+
behavior: SmallWallJump
|
| 97 |
+
signal_smoothing: true
|
| 98 |
+
min_lesson_length: 100
|
| 99 |
+
threshold: 0.1
|
| 100 |
+
value: 1.5
|
| 101 |
+
- name: Lesson1
|
| 102 |
+
completion_criteria:
|
| 103 |
+
measure: progress
|
| 104 |
+
behavior: SmallWallJump
|
| 105 |
+
signal_smoothing: true
|
| 106 |
+
min_lesson_length: 100
|
| 107 |
+
threshold: 0.3
|
| 108 |
+
value: 2.0
|
| 109 |
+
- name: Lesson2
|
| 110 |
+
completion_criteria:
|
| 111 |
+
measure: progress
|
| 112 |
+
behavior: SmallWallJump
|
| 113 |
+
signal_smoothing: true
|
| 114 |
+
min_lesson_length: 100
|
| 115 |
+
threshold: 0.5
|
| 116 |
+
value: 2.5
|
| 117 |
+
- name: Lesson3
|
| 118 |
+
value: 4.0
|
Worm.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
behaviors:
|
| 2 |
+
Worm:
|
| 3 |
+
trainer_type: ppo
|
| 4 |
+
hyperparameters:
|
| 5 |
+
batch_size: 2024
|
| 6 |
+
buffer_size: 20240
|
| 7 |
+
learning_rate: 0.0003
|
| 8 |
+
beta: 0.005
|
| 9 |
+
epsilon: 0.2
|
| 10 |
+
lambd: 0.95
|
| 11 |
+
num_epoch: 3
|
| 12 |
+
learning_rate_schedule: linear
|
| 13 |
+
network_settings:
|
| 14 |
+
normalize: true
|
| 15 |
+
hidden_units: 512
|
| 16 |
+
num_layers: 3
|
| 17 |
+
vis_encode_type: simple
|
| 18 |
+
reward_signals:
|
| 19 |
+
extrinsic:
|
| 20 |
+
gamma: 0.995
|
| 21 |
+
strength: 1.0
|
| 22 |
+
keep_checkpoints: 5
|
| 23 |
+
max_steps: 7000000
|
| 24 |
+
time_horizon: 1000
|
| 25 |
+
summary_freq: 30000
|
config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"behaviors": {"Pyramids": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 128, "buffer_size": 2048, "learning_rate": 0.0003, "beta": 0.01, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "linear"}, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple"}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0}, "rnd": {"gamma": 0.99, "strength": 0.01, "network_settings": {"hidden_units": 64, "num_layers": 3}, "learning_rate": 0.0001}}, "keep_checkpoints": 5, "max_steps": 1000000, "time_horizon": 128, "summary_freq": 30000}}}
|
configuration.yaml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
behaviors:
|
| 3 |
+
Pyramids:
|
| 4 |
+
trainer_type: ppo
|
| 5 |
+
hyperparameters:
|
| 6 |
+
batch_size: 128
|
| 7 |
+
buffer_size: 2048
|
| 8 |
+
learning_rate: 0.0003
|
| 9 |
+
beta: 0.01
|
| 10 |
+
epsilon: 0.2
|
| 11 |
+
lambd: 0.95
|
| 12 |
+
num_epoch: 3
|
| 13 |
+
learning_rate_schedule: linear
|
| 14 |
+
network_settings:
|
| 15 |
+
normalize: false
|
| 16 |
+
hidden_units: 512
|
| 17 |
+
num_layers: 2
|
| 18 |
+
vis_encode_type: simple
|
| 19 |
+
reward_signals:
|
| 20 |
+
extrinsic:
|
| 21 |
+
gamma: 0.99
|
| 22 |
+
strength: 1.0
|
| 23 |
+
rnd:
|
| 24 |
+
gamma: 0.99
|
| 25 |
+
strength: 0.01
|
| 26 |
+
network_settings:
|
| 27 |
+
hidden_units: 64
|
| 28 |
+
num_layers: 3
|
| 29 |
+
learning_rate: 0.0001
|
| 30 |
+
keep_checkpoints: 5
|
| 31 |
+
max_steps: 1000000
|
| 32 |
+
time_horizon: 128
|
| 33 |
+
summary_freq: 30000
|