azaazato commited on
Commit
2979312
·
verified ·
1 Parent(s): e992c67
README.md CHANGED
@@ -1,31 +1,35 @@
1
-
2
  ---
3
- tags:
4
- - unity-ml-agents
5
- - ml-agents
6
- - deep-reinforcement-learning
7
- - reinforcement-learning
8
- - ML-Agents-Huggy
9
- library_name: ml-agents
10
  ---
11
-
12
  # **ppo** Agent playing **Huggy**
13
- This is a trained model of a **ppo** agent playing **Huggy** using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
14
-
 
15
  ## Usage (with ML-Agents)
16
- The Documentation: https://github.com/huggingface/ml-agents#get-started
17
- We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
18
 
 
 
 
 
 
19
 
20
  ### Resume the training
21
- ```
22
  mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
23
  ```
 
24
  ### Watch your Agent play
25
- You can watch your agent **playing directly in your browser:**.
26
-
27
- 1. Go to https://huggingface.co/spaces/unity/ML-Agents-Huggy
28
- 2. Step 1: Write your model_id: azaazato/ppo-Huggy
29
  3. Step 2: Select your *.nn /*.onnx file
30
  4. Click on Watch the agent play 👀
31
 
 
 
1
  ---
2
+ library_name: ml-agents
3
+ tags:
4
+ - Huggy
5
+ - deep-reinforcement-learning
6
+ - reinforcement-learning
7
+ - ML-Agents-Huggy
 
8
  ---
9
+
10
  # **ppo** Agent playing **Huggy**
11
+ This is a trained model of a **ppo** agent playing **Huggy**
12
+ using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
+
14
  ## Usage (with ML-Agents)
15
+ The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
 
16
 
17
+ We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
18
+ - A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
19
+ browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
20
+ - A *longer tutorial* to understand how works ML-Agents:
21
+ https://huggingface.co/learn/deep-rl-course/unit5/introduction
22
 
23
  ### Resume the training
24
+ ```bash
25
  mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
26
  ```
27
+
28
  ### Watch your Agent play
29
+ You can watch your agent **playing directly in your browser**
30
+
31
+ 1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
32
+ 2. Step 1: Find your model_id: azaazato/ppo-Huggy
33
  3. Step 2: Select your *.nn /*.onnx file
34
  4. Click on Watch the agent play 👀
35
 
config.json CHANGED
@@ -1 +1 @@
1
- {"default_settings": null, "behaviors": {"Huggy": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "network_settings": {"normalize": true, "hidden_units": 512, "num_layers": 3, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.995, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 15, "checkpoint_interval": 200000, "max_steps": 2000000, "time_horizon": 1000, "summary_freq": 50000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": "./trained-envs-executables/linux/Huggy/Huggy", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Huggy", "initialize_from": null, "load_model": false, "resume": false, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
 
1
+ {"default_settings": null, "behaviors": {"Huggy": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 2048, "buffer_size": 20480, "learning_rate": 0.0003, "beta": 0.005, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 200000, "network_settings": {"normalize": true, "hidden_units": 512, "num_layers": 3, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.995, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 15, "even_checkpoints": false, "max_steps": 2000000, "time_horizon": 1000, "summary_freq": 50000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": "./trained-envs-executables/linux/Huggy/Huggy", "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": true, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "Huggy2", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml CHANGED
@@ -10,9 +10,11 @@ behaviors:
10
  epsilon: 0.2
11
  lambd: 0.95
12
  num_epoch: 3
 
13
  learning_rate_schedule: linear
14
  beta_schedule: linear
15
  epsilon_schedule: linear
 
16
  network_settings:
17
  normalize: true
18
  hidden_units: 512
@@ -35,7 +37,7 @@ behaviors:
35
  deterministic: false
36
  init_path: null
37
  keep_checkpoints: 15
38
- checkpoint_interval: 200000
39
  max_steps: 2000000
40
  time_horizon: 1000
41
  summary_freq: 50000
@@ -48,6 +50,7 @@ env_settings:
48
  base_port: 5005
49
  num_envs: 1
50
  num_areas: 1
 
51
  seed: -1
52
  max_lifetime_restarts: 10
53
  restarts_rate_limit_n: 1
@@ -60,13 +63,14 @@ engine_settings:
60
  target_frame_rate: -1
61
  capture_frame_rate: 60
62
  no_graphics: true
 
63
  environment_parameters: null
64
  checkpoint_settings:
65
- run_id: Huggy
66
  initialize_from: null
67
  load_model: false
68
  resume: false
69
- force: false
70
  train_model: false
71
  inference: false
72
  results_dir: results
 
10
  epsilon: 0.2
11
  lambd: 0.95
12
  num_epoch: 3
13
+ shared_critic: false
14
  learning_rate_schedule: linear
15
  beta_schedule: linear
16
  epsilon_schedule: linear
17
+ checkpoint_interval: 200000
18
  network_settings:
19
  normalize: true
20
  hidden_units: 512
 
37
  deterministic: false
38
  init_path: null
39
  keep_checkpoints: 15
40
+ even_checkpoints: false
41
  max_steps: 2000000
42
  time_horizon: 1000
43
  summary_freq: 50000
 
50
  base_port: 5005
51
  num_envs: 1
52
  num_areas: 1
53
+ timeout_wait: 60
54
  seed: -1
55
  max_lifetime_restarts: 10
56
  restarts_rate_limit_n: 1
 
63
  target_frame_rate: -1
64
  capture_frame_rate: 60
65
  no_graphics: true
66
+ no_graphics_monitor: false
67
  environment_parameters: null
68
  checkpoint_settings:
69
+ run_id: Huggy2
70
  initialize_from: null
71
  load_model: false
72
  resume: false
73
+ force: true
74
  train_model: false
75
  inference: false
76
  results_dir: results
run_logs/timers.json CHANGED
@@ -1,367 +1,45 @@
1
  {
2
  "name": "root",
3
- "gauges": {
4
- "Huggy.Policy.Entropy.mean": {
5
- "value": 1.4060659408569336,
6
- "min": 1.4060659408569336,
7
- "max": 1.4315779209136963,
8
- "count": 40
9
- },
10
- "Huggy.Policy.Entropy.sum": {
11
- "value": 70076.921875,
12
- "min": 68163.3125,
13
- "max": 78823.0625,
14
- "count": 40
15
- },
16
- "Huggy.Environment.EpisodeLength.mean": {
17
- "value": 95.30057803468208,
18
- "min": 84.09013605442176,
19
- "max": 380.91603053435114,
20
- "count": 40
21
- },
22
- "Huggy.Environment.EpisodeLength.sum": {
23
- "value": 49461.0,
24
- "min": 48870.0,
25
- "max": 50034.0,
26
- "count": 40
27
- },
28
- "Huggy.Step.mean": {
29
- "value": 1999905.0,
30
- "min": 49810.0,
31
- "max": 1999905.0,
32
- "count": 40
33
- },
34
- "Huggy.Step.sum": {
35
- "value": 1999905.0,
36
- "min": 49810.0,
37
- "max": 1999905.0,
38
- "count": 40
39
- },
40
- "Huggy.Policy.ExtrinsicValueEstimate.mean": {
41
- "value": 2.411778450012207,
42
- "min": 0.06771308928728104,
43
- "max": 2.4655308723449707,
44
- "count": 40
45
- },
46
- "Huggy.Policy.ExtrinsicValueEstimate.sum": {
47
- "value": 1251.7130126953125,
48
- "min": 8.802701950073242,
49
- "max": 1396.031005859375,
50
- "count": 40
51
- },
52
- "Huggy.Environment.CumulativeReward.mean": {
53
- "value": 3.7844826048509232,
54
- "min": 1.8963831330721195,
55
- "max": 3.932653166745838,
56
- "count": 40
57
- },
58
- "Huggy.Environment.CumulativeReward.sum": {
59
- "value": 1964.1464719176292,
60
- "min": 246.52980729937553,
61
- "max": 2218.038535296917,
62
- "count": 40
63
- },
64
- "Huggy.Policy.ExtrinsicReward.mean": {
65
- "value": 3.7844826048509232,
66
- "min": 1.8963831330721195,
67
- "max": 3.932653166745838,
68
- "count": 40
69
- },
70
- "Huggy.Policy.ExtrinsicReward.sum": {
71
- "value": 1964.1464719176292,
72
- "min": 246.52980729937553,
73
- "max": 2218.038535296917,
74
- "count": 40
75
- },
76
- "Huggy.Losses.PolicyLoss.mean": {
77
- "value": 0.016120348759351248,
78
- "min": 0.014345712727905871,
79
- "max": 0.019013986076849204,
80
- "count": 40
81
- },
82
- "Huggy.Losses.PolicyLoss.sum": {
83
- "value": 0.048361046278053745,
84
- "min": 0.03087165418449634,
85
- "max": 0.05704195823054761,
86
- "count": 40
87
- },
88
- "Huggy.Losses.ValueLoss.mean": {
89
- "value": 0.05144879606862863,
90
- "min": 0.020817248274882634,
91
- "max": 0.05970884294559558,
92
- "count": 40
93
- },
94
- "Huggy.Losses.ValueLoss.sum": {
95
- "value": 0.1543463882058859,
96
- "min": 0.04163449654976527,
97
- "max": 0.173665156836311,
98
- "count": 40
99
- },
100
- "Huggy.Policy.LearningRate.mean": {
101
- "value": 3.4034988655333344e-06,
102
- "min": 3.4034988655333344e-06,
103
- "max": 0.00029531017656327487,
104
- "count": 40
105
- },
106
- "Huggy.Policy.LearningRate.sum": {
107
- "value": 1.0210496596600003e-05,
108
- "min": 1.0210496596600003e-05,
109
- "max": 0.0008441998686000501,
110
- "count": 40
111
- },
112
- "Huggy.Policy.Epsilon.mean": {
113
- "value": 0.10113446666666669,
114
- "min": 0.10113446666666669,
115
- "max": 0.198436725,
116
- "count": 40
117
- },
118
- "Huggy.Policy.Epsilon.sum": {
119
- "value": 0.30340340000000005,
120
- "min": 0.20742860000000005,
121
- "max": 0.58139995,
122
- "count": 40
123
- },
124
- "Huggy.Policy.Beta.mean": {
125
- "value": 6.66098866666667e-05,
126
- "min": 6.66098866666667e-05,
127
- "max": 0.004921992577500001,
128
- "count": 40
129
- },
130
- "Huggy.Policy.Beta.sum": {
131
- "value": 0.00019982966000000007,
132
- "min": 0.00019982966000000007,
133
- "max": 0.014071857505000004,
134
- "count": 40
135
- },
136
- "Huggy.IsTraining.mean": {
137
- "value": 1.0,
138
- "min": 1.0,
139
- "max": 1.0,
140
- "count": 40
141
- },
142
- "Huggy.IsTraining.sum": {
143
- "value": 1.0,
144
- "min": 1.0,
145
- "max": 1.0,
146
- "count": 40
147
- }
148
- },
149
  "metadata": {
150
  "timer_format_version": "0.1.0",
151
- "start_time_seconds": "1675774841",
152
- "python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
153
- "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics",
154
- "mlagents_version": "0.29.0.dev0",
155
- "mlagents_envs_version": "0.29.0.dev0",
156
  "communication_protocol_version": "1.5.0",
157
- "pytorch_version": "1.8.1+cu102",
158
- "numpy_version": "1.21.6",
159
- "end_time_seconds": "1675777341"
160
  },
161
- "total": 2499.553229916,
162
  "count": 1,
163
- "self": 0.38974296400010644,
164
  "children": {
165
  "run_training.setup": {
166
- "total": 0.12540149000000156,
167
  "count": 1,
168
- "self": 0.12540149000000156
169
  },
170
  "TrainerController.start_learning": {
171
- "total": 2499.038085462,
172
  "count": 1,
173
- "self": 4.459906200040223,
174
  "children": {
175
  "TrainerController._reset_env": {
176
- "total": 11.430268882999997,
177
  "count": 1,
178
- "self": 11.430268882999997
179
- },
180
- "TrainerController.advance": {
181
- "total": 2483.02428767096,
182
- "count": 231887,
183
- "self": 4.80020010498356,
184
- "children": {
185
- "env_step": {
186
- "total": 1936.3103898659372,
187
- "count": 231887,
188
- "self": 1612.945572966961,
189
- "children": {
190
- "SubprocessEnvManager._take_step": {
191
- "total": 320.3758816540504,
192
- "count": 231887,
193
- "self": 16.48313822310297,
194
- "children": {
195
- "TorchPolicy.evaluate": {
196
- "total": 303.8927434309474,
197
- "count": 223039,
198
- "self": 76.1059216419576,
199
- "children": {
200
- "TorchPolicy.sample_actions": {
201
- "total": 227.7868217889898,
202
- "count": 223039,
203
- "self": 227.7868217889898
204
- }
205
- }
206
- }
207
- }
208
- },
209
- "workers": {
210
- "total": 2.9889352449258126,
211
- "count": 231887,
212
- "self": 0.0,
213
- "children": {
214
- "worker_root": {
215
- "total": 2490.4996110509,
216
- "count": 231887,
217
- "is_parallel": true,
218
- "self": 1181.961858940976,
219
- "children": {
220
- "run_training.setup": {
221
- "total": 0.0,
222
- "count": 0,
223
- "is_parallel": true,
224
- "self": 0.0,
225
- "children": {
226
- "steps_from_proto": {
227
- "total": 0.0035053580000408147,
228
- "count": 1,
229
- "is_parallel": true,
230
- "self": 0.0003519030000802559,
231
- "children": {
232
- "_process_rank_one_or_two_observation": {
233
- "total": 0.0031534549999605588,
234
- "count": 2,
235
- "is_parallel": true,
236
- "self": 0.0031534549999605588
237
- }
238
- }
239
- },
240
- "UnityEnvironment.step": {
241
- "total": 0.06337132800001655,
242
- "count": 1,
243
- "is_parallel": true,
244
- "self": 0.000358854000012343,
245
- "children": {
246
- "UnityEnvironment._generate_step_input": {
247
- "total": 0.00021737800000209972,
248
- "count": 1,
249
- "is_parallel": true,
250
- "self": 0.00021737800000209972
251
- },
252
- "communicator.exchange": {
253
- "total": 0.062010756000006495,
254
- "count": 1,
255
- "is_parallel": true,
256
- "self": 0.062010756000006495
257
- },
258
- "steps_from_proto": {
259
- "total": 0.0007843399999956091,
260
- "count": 1,
261
- "is_parallel": true,
262
- "self": 0.00028456799992682136,
263
- "children": {
264
- "_process_rank_one_or_two_observation": {
265
- "total": 0.0004997720000687877,
266
- "count": 2,
267
- "is_parallel": true,
268
- "self": 0.0004997720000687877
269
- }
270
- }
271
- }
272
- }
273
- }
274
- }
275
- },
276
- "UnityEnvironment.step": {
277
- "total": 1308.537752109924,
278
- "count": 231886,
279
- "is_parallel": true,
280
- "self": 38.942038724876056,
281
- "children": {
282
- "UnityEnvironment._generate_step_input": {
283
- "total": 82.73317857093588,
284
- "count": 231886,
285
- "is_parallel": true,
286
- "self": 82.73317857093588
287
- },
288
- "communicator.exchange": {
289
- "total": 1090.6349302629594,
290
- "count": 231886,
291
- "is_parallel": true,
292
- "self": 1090.6349302629594
293
- },
294
- "steps_from_proto": {
295
- "total": 96.22760455115247,
296
- "count": 231886,
297
- "is_parallel": true,
298
- "self": 41.294710258310715,
299
- "children": {
300
- "_process_rank_one_or_two_observation": {
301
- "total": 54.93289429284175,
302
- "count": 463772,
303
- "is_parallel": true,
304
- "self": 54.93289429284175
305
- }
306
- }
307
- }
308
- }
309
- }
310
- }
311
- }
312
- }
313
- }
314
- }
315
- },
316
- "trainer_advance": {
317
- "total": 541.9136977000394,
318
- "count": 231887,
319
- "self": 6.891252809051366,
320
- "children": {
321
- "process_trajectory": {
322
- "total": 168.41609711199072,
323
- "count": 231887,
324
- "self": 167.1537047229911,
325
- "children": {
326
- "RLTrainer._checkpoint": {
327
- "total": 1.262392388999615,
328
- "count": 10,
329
- "self": 1.262392388999615
330
- }
331
- }
332
- },
333
- "_update_policy": {
334
- "total": 366.6063477789973,
335
- "count": 97,
336
- "self": 308.5765702070148,
337
- "children": {
338
- "TorchPPOOptimizer.update": {
339
- "total": 58.02977757198249,
340
- "count": 2910,
341
- "self": 58.02977757198249
342
- }
343
- }
344
- }
345
- }
346
- }
347
- }
348
  },
349
  "trainer_threads": {
350
- "total": 8.279998837679159e-07,
351
  "count": 1,
352
- "self": 8.279998837679159e-07
353
  },
354
  "TrainerController._save_models": {
355
- "total": 0.12362187999997332,
356
  "count": 1,
357
- "self": 0.002087185999698704,
358
- "children": {
359
- "RLTrainer._checkpoint": {
360
- "total": 0.12153469400027461,
361
- "count": 1,
362
- "self": 0.12153469400027461
363
- }
364
- }
365
  }
366
  }
367
  }
 
1
  {
2
  "name": "root",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "metadata": {
4
  "timer_format_version": "0.1.0",
5
+ "start_time_seconds": "1762173766",
6
+ "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
7
+ "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy2 --no-graphics --force",
8
+ "mlagents_version": "1.2.0.dev0",
9
+ "mlagents_envs_version": "1.2.0.dev0",
10
  "communication_protocol_version": "1.5.0",
11
+ "pytorch_version": "2.8.0+cu128",
12
+ "numpy_version": "1.23.5",
13
+ "end_time_seconds": "1762173766"
14
  },
15
+ "total": 0.04546879700001227,
16
  "count": 1,
17
+ "self": 0.007940021000194974,
18
  "children": {
19
  "run_training.setup": {
20
+ "total": 0.023404353999922023,
21
  "count": 1,
22
+ "self": 0.023404353999922023
23
  },
24
  "TrainerController.start_learning": {
25
+ "total": 0.014124421999895276,
26
  "count": 1,
27
+ "self": 0.0003588899999158457,
28
  "children": {
29
  "TrainerController._reset_env": {
30
+ "total": 0.013742294000167021,
31
  "count": 1,
32
+ "self": 0.013742294000167021
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  "trainer_threads": {
35
+ "total": 1.4979996194597334e-06,
36
  "count": 1,
37
+ "self": 1.4979996194597334e-06
38
  },
39
  "TrainerController._save_models": {
40
+ "total": 2.1740000192949083e-05,
41
  "count": 1,
42
+ "self": 2.1740000192949083e-05
 
 
 
 
 
 
 
43
  }
44
  }
45
  }
run_logs/training_status.json CHANGED
@@ -1,119 +1,7 @@
1
  {
2
- "Huggy": {
3
- "checkpoints": [
4
- {
5
- "steps": 199966,
6
- "file_path": "results/Huggy/Huggy/Huggy-199966.onnx",
7
- "reward": 3.425651775797208,
8
- "creation_time": 1675775098.1742382,
9
- "auxillary_file_paths": [
10
- "results/Huggy/Huggy/Huggy-199966.pt"
11
- ]
12
- },
13
- {
14
- "steps": 399948,
15
- "file_path": "results/Huggy/Huggy/Huggy-399948.onnx",
16
- "reward": 3.8748908042907715,
17
- "creation_time": 1675775352.17324,
18
- "auxillary_file_paths": [
19
- "results/Huggy/Huggy/Huggy-399948.pt"
20
- ]
21
- },
22
- {
23
- "steps": 599968,
24
- "file_path": "results/Huggy/Huggy/Huggy-599968.onnx",
25
- "reward": 3.5677208367146944,
26
- "creation_time": 1675775606.0478587,
27
- "auxillary_file_paths": [
28
- "results/Huggy/Huggy/Huggy-599968.pt"
29
- ]
30
- },
31
- {
32
- "steps": 799998,
33
- "file_path": "results/Huggy/Huggy/Huggy-799998.onnx",
34
- "reward": 3.773281857992212,
35
- "creation_time": 1675775858.7409933,
36
- "auxillary_file_paths": [
37
- "results/Huggy/Huggy/Huggy-799998.pt"
38
- ]
39
- },
40
- {
41
- "steps": 999841,
42
- "file_path": "results/Huggy/Huggy/Huggy-999841.onnx",
43
- "reward": 3.8870706538685034,
44
- "creation_time": 1675776109.7399635,
45
- "auxillary_file_paths": [
46
- "results/Huggy/Huggy/Huggy-999841.pt"
47
- ]
48
- },
49
- {
50
- "steps": 1199402,
51
- "file_path": "results/Huggy/Huggy/Huggy-1199402.onnx",
52
- "reward": 3.251411199569702,
53
- "creation_time": 1675776355.9214182,
54
- "auxillary_file_paths": [
55
- "results/Huggy/Huggy/Huggy-1199402.pt"
56
- ]
57
- },
58
- {
59
- "steps": 1399908,
60
- "file_path": "results/Huggy/Huggy/Huggy-1399908.onnx",
61
- "reward": 4.009560413658619,
62
- "creation_time": 1675776604.0595386,
63
- "auxillary_file_paths": [
64
- "results/Huggy/Huggy/Huggy-1399908.pt"
65
- ]
66
- },
67
- {
68
- "steps": 1599898,
69
- "file_path": "results/Huggy/Huggy/Huggy-1599898.onnx",
70
- "reward": 3.570658856845764,
71
- "creation_time": 1675776848.6336129,
72
- "auxillary_file_paths": [
73
- "results/Huggy/Huggy/Huggy-1599898.pt"
74
- ]
75
- },
76
- {
77
- "steps": 1799958,
78
- "file_path": "results/Huggy/Huggy/Huggy-1799958.onnx",
79
- "reward": 3.881925370433543,
80
- "creation_time": 1675777095.2964113,
81
- "auxillary_file_paths": [
82
- "results/Huggy/Huggy/Huggy-1799958.pt"
83
- ]
84
- },
85
- {
86
- "steps": 1999905,
87
- "file_path": "results/Huggy/Huggy/Huggy-1999905.onnx",
88
- "reward": 3.977051999833849,
89
- "creation_time": 1675777340.8340354,
90
- "auxillary_file_paths": [
91
- "results/Huggy/Huggy/Huggy-1999905.pt"
92
- ]
93
- },
94
- {
95
- "steps": 2000030,
96
- "file_path": "results/Huggy/Huggy/Huggy-2000030.onnx",
97
- "reward": 4.080118775367737,
98
- "creation_time": 1675777340.964843,
99
- "auxillary_file_paths": [
100
- "results/Huggy/Huggy/Huggy-2000030.pt"
101
- ]
102
- }
103
- ],
104
- "final_checkpoint": {
105
- "steps": 2000030,
106
- "file_path": "results/Huggy/Huggy.onnx",
107
- "reward": 4.080118775367737,
108
- "creation_time": 1675777340.964843,
109
- "auxillary_file_paths": [
110
- "results/Huggy/Huggy/Huggy-2000030.pt"
111
- ]
112
- }
113
- },
114
  "metadata": {
115
  "stats_format_version": "0.3.0",
116
- "mlagents_version": "0.29.0.dev0",
117
- "torch_version": "1.8.1+cu102"
118
  }
119
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "metadata": {
3
  "stats_format_version": "0.3.0",
4
+ "mlagents_version": "1.2.0.dev0",
5
+ "torch_version": "2.8.0+cu128"
6
  }
7
  }