loweegee commited on
Commit
067fcc6
·
verified ·
1 Parent(s): fd3726e

First Push

Browse files
3DBall.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ 3DBall:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 64
6
+ buffer_size: 12000
7
+ learning_rate: 0.0003
8
+ beta: 0.001
9
+ epsilon: 0.2
10
+ lambd: 0.99
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 500000
24
+ time_horizon: 1000
25
+ summary_freq: 12000
3DBallHard.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ 3DBallHard:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 120
6
+ buffer_size: 12000
7
+ learning_rate: 0.0003
8
+ beta: 0.001
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 500000
24
+ time_horizon: 1000
25
+ summary_freq: 12000
3DBall_randomize.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ 3DBall:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 64
6
+ buffer_size: 12000
7
+ learning_rate: 0.0003
8
+ beta: 0.001
9
+ epsilon: 0.2
10
+ lambd: 0.99
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 500000
24
+ time_horizon: 1000
25
+ summary_freq: 12000
26
+ environment_parameters:
27
+ mass:
28
+ sampler_type: uniform
29
+ sampler_parameters:
30
+ min_value: 0.5
31
+ max_value: 10
32
+ scale:
33
+ sampler_type: uniform
34
+ sampler_parameters:
35
+ min_value: 0.75
36
+ max_value: 3
Basic.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Basic:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 32
6
+ buffer_size: 256
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 20
16
+ num_layers: 1
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.9
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 500000
24
+ time_horizon: 3
25
+ summary_freq: 2000
Crawler.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Crawler:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 2048
6
+ buffer_size: 20480
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 512
16
+ num_layers: 3
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.995
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 10000000
24
+ time_horizon: 1000
25
+ summary_freq: 30000
FoodCollector.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ GridFoodCollector:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 1024
6
+ buffer_size: 10240
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 256
16
+ num_layers: 1
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 2000000
24
+ time_horizon: 64
25
+ summary_freq: 10000
GridWorld.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ GridWorld:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 32
6
+ buffer_size: 256
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 128
16
+ num_layers: 1
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.9
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 500000
24
+ time_horizon: 5
25
+ summary_freq: 20000
Hallway.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Hallway:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 1024
7
+ learning_rate: 0.0003
8
+ beta: 0.03
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ memory:
19
+ sequence_length: 64
20
+ memory_size: 128
21
+ reward_signals:
22
+ extrinsic:
23
+ gamma: 0.99
24
+ strength: 1.0
25
+ keep_checkpoints: 5
26
+ max_steps: 10000000
27
+ time_horizon: 64
28
+ summary_freq: 10000
Match3.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_settings:
2
+ trainer_type: ppo
3
+ hyperparameters:
4
+ batch_size: 16
5
+ buffer_size: 120
6
+ learning_rate: 0.0003
7
+ beta: 0.005
8
+ epsilon: 0.2
9
+ lambd: 0.99
10
+ num_epoch: 3
11
+ learning_rate_schedule: constant
12
+ network_settings:
13
+ normalize: true
14
+ hidden_units: 256
15
+ num_layers: 4
16
+ vis_encode_type: match3
17
+ reward_signals:
18
+ extrinsic:
19
+ gamma: 0.99
20
+ strength: 1.0
21
+ keep_checkpoints: 5
22
+ max_steps: 5000000
23
+ time_horizon: 128
24
+ summary_freq: 10000
25
+
26
+ behaviors:
27
+ Match3SimpleHeuristic:
28
+ # Settings can be very simple since we don't care about actually training the model
29
+ trainer_type: ppo
30
+ hyperparameters:
31
+ batch_size: 16
32
+ buffer_size: 120
33
+ network_settings:
34
+ hidden_units: 4
35
+ num_layers: 1
36
+ max_steps: 5000000
37
+ summary_freq: 10000
38
+ Match3SmartHeuristic:
39
+ # Settings can be very simple since we don't care about actually training the model
40
+ trainer_type: ppo
41
+ hyperparameters:
42
+ batch_size: 16
43
+ buffer_size: 120
44
+ network_settings:
45
+ hidden_units: 4
46
+ num_layers: 1
47
+ max_steps: 5000000
48
+ summary_freq: 10000
PushBlock.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ PushBlock:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 2048
7
+ learning_rate: 0.0003
8
+ beta: 0.01
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 256
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 2000000
24
+ time_horizon: 64
25
+ summary_freq: 60000
Pyramids.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Pyramids:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 2048
7
+ learning_rate: 0.0003
8
+ beta: 0.01
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 512
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ curiosity:
23
+ gamma: 0.99
24
+ strength: 0.02
25
+ network_settings:
26
+ hidden_units: 256
27
+ learning_rate: 0.0003
28
+ keep_checkpoints: 5
29
+ max_steps: 10000000
30
+ time_horizon: 128
31
+ summary_freq: 30000
PyramidsRND.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Pyramids:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 2048
7
+ learning_rate: 0.0003
8
+ beta: 0.01
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 512
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ rnd:
23
+ gamma: 0.99
24
+ strength: 0.01
25
+ network_settings:
26
+ hidden_units: 64
27
+ num_layers: 3
28
+ learning_rate: 0.0001
29
+ keep_checkpoints: 5
30
+ max_steps: 1000000
31
+ time_horizon: 128
32
+ summary_freq: 30000
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: ml-agents
3
+ tags:
4
+ - Pyramids
5
+ - deep-reinforcement-learning
6
+ - reinforcement-learning
7
+ - ML-Agents-Pyramids
8
+ ---
9
+
10
+ # **ppo** Agent playing **Pyramids**
11
+ This is a trained model of a **ppo** agent playing **Pyramids**
12
+ using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
+
14
+ ## Usage (with ML-Agents)
15
+ The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
16
+
17
+ We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
18
+ - A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
19
+ browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
20
+ - A *longer tutorial* to understand how works ML-Agents:
21
+ https://huggingface.co/learn/deep-rl-course/unit5/introduction
22
+
23
+ ### Resume the training
24
+ ```bash
25
+ mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
26
+ ```
27
+
28
+ ### Watch your Agent play
29
+ You can watch your agent **playing directly in your browser**
30
+
31
+ 1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
32
+ 2. Step 1: Find your model_id: loweegee/ppo-pyramids
33
+ 3. Step 2: Select your *.nn /*.onnx file
34
+ 4. Click on Watch the agent play 👀
35
+
SnowballTarget.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ behaviors:
3
+ SnowballTarget:
4
+ trainer_type: ppo
5
+ summary_freq: 10000
6
+ keep_checkpoints: 10
7
+ checkpoint_interval: 50000
8
+ max_steps: 200000
9
+ time_horizon: 64
10
+ threaded: false
11
+ hyperparameters:
12
+ learning_rate: 0.0003
13
+ learning_rate_schedule: linear
14
+ batch_size: 128
15
+ buffer_size: 2048
16
+ beta: 0.005
17
+ epsilon: 0.2
18
+ lambd: 0.95
19
+ num_epoch: 3
20
+ network_settings:
21
+ normalize: false
22
+ hidden_units: 256
23
+ num_layers: 2
24
+ vis_encode_type: simple
25
+ reward_signals:
26
+ extrinsic:
27
+ gamma: 0.99
28
+ strength: 1.0
Sorter_curriculum.yaml ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Sorter:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 512
6
+ buffer_size: 40960
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: constant
13
+ network_settings:
14
+ normalize: False
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 5000000
24
+ time_horizon: 256
25
+ summary_freq: 10000
26
+ environment_parameters:
27
+ num_tiles:
28
+ curriculum:
29
+ - name: Lesson0 # The '-' is important as this is a list
30
+ completion_criteria:
31
+ measure: progress
32
+ behavior: Sorter
33
+ signal_smoothing: true
34
+ min_lesson_length: 100
35
+ threshold: 0.3
36
+ value: 2.0
37
+ - name: Lesson1
38
+ completion_criteria:
39
+ measure: progress
40
+ behavior: Sorter
41
+ signal_smoothing: true
42
+ min_lesson_length: 100
43
+ threshold: 0.4
44
+ value: 4.0
45
+ - name: Lesson2
46
+ completion_criteria:
47
+ measure: progress
48
+ behavior: Sorter
49
+ signal_smoothing: true
50
+ min_lesson_length: 100
51
+ threshold: 0.45
52
+ value: 6.0
53
+ - name: Lesson3
54
+ completion_criteria:
55
+ measure: progress
56
+ behavior: Sorter
57
+ signal_smoothing: true
58
+ min_lesson_length: 100
59
+ threshold: 0.5
60
+ value: 8.0
61
+ - name: Lesson4
62
+ completion_criteria:
63
+ measure: progress
64
+ behavior: Sorter
65
+ signal_smoothing: true
66
+ min_lesson_length: 100
67
+ threshold: 0.55
68
+ value: 10.0
69
+ - name: Lesson5
70
+ completion_criteria:
71
+ measure: progress
72
+ behavior: Sorter
73
+ signal_smoothing: true
74
+ min_lesson_length: 100
75
+ threshold: 0.6
76
+ value: 12.0
77
+ - name: Lesson6
78
+ completion_criteria:
79
+ measure: progress
80
+ behavior: Sorter
81
+ signal_smoothing: true
82
+ min_lesson_length: 100
83
+ threshold: 0.65
84
+ value: 14.0
85
+ - name: Lesson7
86
+ completion_criteria:
87
+ measure: progress
88
+ behavior: Sorter
89
+ signal_smoothing: true
90
+ min_lesson_length: 100
91
+ threshold: 0.7
92
+ value: 16.0
93
+ - name: Lesson8
94
+ completion_criteria:
95
+ measure: progress
96
+ behavior: Sorter
97
+ signal_smoothing: true
98
+ min_lesson_length: 100
99
+ threshold: 0.75
100
+ value: 18.0
101
+ - name: Lesson9
102
+ value: 20.0
Visual3DBall.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Visual3DBall:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 256
6
+ buffer_size: 2560
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 400000
24
+ time_horizon: 64
25
+ summary_freq: 20000
VisualFoodCollector.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ VisualFoodCollector:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 1024
6
+ buffer_size: 10240
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 128
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 3000000
24
+ time_horizon: 100
25
+ summary_freq: 40000
Walker.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Walker:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 2048
6
+ buffer_size: 20480
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 256
16
+ num_layers: 3
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.995
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 30000000
24
+ time_horizon: 1000
25
+ summary_freq: 30000
WallJump.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ BigWallJump:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 2048
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 256
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 20000000
24
+ time_horizon: 128
25
+ summary_freq: 20000
26
+ SmallWallJump:
27
+ trainer_type: ppo
28
+ hyperparameters:
29
+ batch_size: 128
30
+ buffer_size: 2048
31
+ learning_rate: 0.0003
32
+ beta: 0.005
33
+ epsilon: 0.2
34
+ lambd: 0.95
35
+ num_epoch: 3
36
+ learning_rate_schedule: linear
37
+ network_settings:
38
+ normalize: false
39
+ hidden_units: 256
40
+ num_layers: 2
41
+ vis_encode_type: simple
42
+ reward_signals:
43
+ extrinsic:
44
+ gamma: 0.99
45
+ strength: 1.0
46
+ keep_checkpoints: 5
47
+ max_steps: 5000000
48
+ time_horizon: 128
49
+ summary_freq: 20000
WallJump_curriculum.yaml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ BigWallJump:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 128
6
+ buffer_size: 2048
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: false
15
+ hidden_units: 256
16
+ num_layers: 2
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.99
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 20000000
24
+ time_horizon: 128
25
+ summary_freq: 20000
26
+ SmallWallJump:
27
+ trainer_type: ppo
28
+ hyperparameters:
29
+ batch_size: 128
30
+ buffer_size: 2048
31
+ learning_rate: 0.0003
32
+ beta: 0.005
33
+ epsilon: 0.2
34
+ lambd: 0.95
35
+ num_epoch: 3
36
+ learning_rate_schedule: linear
37
+ network_settings:
38
+ normalize: false
39
+ hidden_units: 256
40
+ num_layers: 2
41
+ vis_encode_type: simple
42
+ reward_signals:
43
+ extrinsic:
44
+ gamma: 0.99
45
+ strength: 1.0
46
+ keep_checkpoints: 5
47
+ max_steps: 5000000
48
+ time_horizon: 128
49
+ summary_freq: 20000
50
+ environment_parameters:
51
+ big_wall_height:
52
+ curriculum:
53
+ - name: Lesson0 # The '-' is important as this is a list
54
+ completion_criteria:
55
+ measure: progress
56
+ behavior: BigWallJump
57
+ signal_smoothing: true
58
+ min_lesson_length: 100
59
+ threshold: 0.1
60
+ value:
61
+ sampler_type: uniform
62
+ sampler_parameters:
63
+ min_value: 0.0
64
+ max_value: 4.0
65
+ - name: Lesson1 # This is the start of the second lesson
66
+ completion_criteria:
67
+ measure: progress
68
+ behavior: BigWallJump
69
+ signal_smoothing: true
70
+ min_lesson_length: 100
71
+ threshold: 0.3
72
+ value:
73
+ sampler_type: uniform
74
+ sampler_parameters:
75
+ min_value: 4.0
76
+ max_value: 7.0
77
+ - name: Lesson2
78
+ completion_criteria:
79
+ measure: progress
80
+ behavior: BigWallJump
81
+ signal_smoothing: true
82
+ min_lesson_length: 100
83
+ threshold: 0.5
84
+ value:
85
+ sampler_type: uniform
86
+ sampler_parameters:
87
+ min_value: 6.0
88
+ max_value: 8.0
89
+ - name: Lesson3
90
+ value: 8.0
91
+ small_wall_height:
92
+ curriculum:
93
+ - name: Lesson0
94
+ completion_criteria:
95
+ measure: progress
96
+ behavior: SmallWallJump
97
+ signal_smoothing: true
98
+ min_lesson_length: 100
99
+ threshold: 0.1
100
+ value: 1.5
101
+ - name: Lesson1
102
+ completion_criteria:
103
+ measure: progress
104
+ behavior: SmallWallJump
105
+ signal_smoothing: true
106
+ min_lesson_length: 100
107
+ threshold: 0.3
108
+ value: 2.0
109
+ - name: Lesson2
110
+ completion_criteria:
111
+ measure: progress
112
+ behavior: SmallWallJump
113
+ signal_smoothing: true
114
+ min_lesson_length: 100
115
+ threshold: 0.5
116
+ value: 2.5
117
+ - name: Lesson3
118
+ value: 4.0
Worm.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ behaviors:
2
+ Worm:
3
+ trainer_type: ppo
4
+ hyperparameters:
5
+ batch_size: 2024
6
+ buffer_size: 20240
7
+ learning_rate: 0.0003
8
+ beta: 0.005
9
+ epsilon: 0.2
10
+ lambd: 0.95
11
+ num_epoch: 3
12
+ learning_rate_schedule: linear
13
+ network_settings:
14
+ normalize: true
15
+ hidden_units: 512
16
+ num_layers: 3
17
+ vis_encode_type: simple
18
+ reward_signals:
19
+ extrinsic:
20
+ gamma: 0.995
21
+ strength: 1.0
22
+ keep_checkpoints: 5
23
+ max_steps: 7000000
24
+ time_horizon: 1000
25
+ summary_freq: 30000
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"behaviors": {"Pyramids": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 128, "buffer_size": 2048, "learning_rate": 0.0003, "beta": 0.01, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "linear"}, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple"}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0}, "rnd": {"gamma": 0.99, "strength": 0.01, "network_settings": {"hidden_units": 64, "num_layers": 3}, "learning_rate": 0.0001}}, "keep_checkpoints": 5, "max_steps": 1000000, "time_horizon": 128, "summary_freq": 30000}}}
configuration.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ behaviors:
3
+ Pyramids:
4
+ trainer_type: ppo
5
+ hyperparameters:
6
+ batch_size: 128
7
+ buffer_size: 2048
8
+ learning_rate: 0.0003
9
+ beta: 0.01
10
+ epsilon: 0.2
11
+ lambd: 0.95
12
+ num_epoch: 3
13
+ learning_rate_schedule: linear
14
+ network_settings:
15
+ normalize: false
16
+ hidden_units: 512
17
+ num_layers: 2
18
+ vis_encode_type: simple
19
+ reward_signals:
20
+ extrinsic:
21
+ gamma: 0.99
22
+ strength: 1.0
23
+ rnd:
24
+ gamma: 0.99
25
+ strength: 0.01
26
+ network_settings:
27
+ hidden_units: 64
28
+ num_layers: 3
29
+ learning_rate: 0.0001
30
+ keep_checkpoints: 5
31
+ max_steps: 1000000
32
+ time_horizon: 128
33
+ summary_freq: 30000