partzel commited on
Commit
25ea751
·
verified ·
1 Parent(s): bf56587

imp: use seed 451

Browse files
AimAndThrow.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7101dd073c902311a675f61994cc62f5d899521265b256d91b7e86c0c77317c
3
  size 79097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5036d175da74a2ec1ef28e80b453a1f6d842675af3814a76f955d7928718837d
3
  size 79097
AimAndThrow/AimAndThrow-1499763.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792b64e7f6998caacc3572444bd86ff44233a93b5df34ddeb43c58fe4f41e29c
3
+ size 79097
AimAndThrow/AimAndThrow-1499763.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16cd5d27aec79aee01af7ac64423c619e3f8ee8650b9b120cce72778d14123d6
3
+ size 454608
AimAndThrow/AimAndThrow-1999828.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92820ba8ad80c85d3b642db78f54a2e86692904aaac248cb76e7962d21e5867f
3
+ size 79097
AimAndThrow/AimAndThrow-1999828.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca72850e398ce8617889b0e03b82b8b9888f173afdd78186515f1099b92474c7
3
+ size 454608
AimAndThrow/AimAndThrow-2172.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4280c10837be0a2d3629dc50f318f86af42e8251af776686a868c324f33cc0d2
3
+ size 78644
AimAndThrow/AimAndThrow-2172.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49567fd2b5c8e038ef9bbee68c00f2e06ddd689aa9d7af4365ad768601ac0276
3
+ size 156335
AimAndThrow/AimAndThrow-2499893.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d511e17c2fd60047b8858505f92084d4389d208ff86658d5a3a4e59e4767e97
3
+ size 79097
AimAndThrow/AimAndThrow-2499893.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e5018133a8c0e68901893cc4b525d8d826d8c491f948ae4335128a480c2592
3
+ size 454608
AimAndThrow/AimAndThrow-2999898.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94de3d0f1e4a03cf893bfd365cade1ed33aed1a021939fc93d8baa2284631814
3
+ size 79097
AimAndThrow/AimAndThrow-2999898.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4181926ed992ebd051c56430c66bed3ced71ab4a42a38362202119f4d32dd81c
3
+ size 454608
AimAndThrow/AimAndThrow-3499955.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760413c5a952230ddca36ee2a66b295422a999db38d67f0b2e76b8c371f17c84
3
+ size 79097
AimAndThrow/AimAndThrow-3499955.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a1216c2079378efc08724d4d29b2175be3ad2fb1bab89e04cf3b540010b97b
3
+ size 454608
AimAndThrow/AimAndThrow-3999903.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1aad3f10c313f03f59095b3a5db5b27edf7be1bd1621fcc69d5e86564876ee
3
+ size 79097
AimAndThrow/AimAndThrow-3999903.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00554f0b2348b8f7577fef5ddf5f1a9867ce079dd6fe00e7357cd5b207d644b6
3
+ size 454608
AimAndThrow/AimAndThrow-4499878.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee9f8aa1e471dbfba5f8f686dec19416a331947d5bf280a0207d3b20ff27814
3
+ size 79097
AimAndThrow/AimAndThrow-4499878.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a77cc438bdc7021db82608f154c7c69eef56aac2f22d23f707045656d1c60e
3
+ size 454608
AimAndThrow/AimAndThrow-4999944.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f4cfaf5dd9806a0fcc8694bfd1f59de7f7d5a382dec354a2599a9d806987df
3
+ size 79097
AimAndThrow/AimAndThrow-4999944.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f23f166ae577d39a7a82dd2d36dfaa4865a6ab8eb50f0a3059f80e99b07e2c1
3
+ size 454608
AimAndThrow/AimAndThrow-5000085.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5036d175da74a2ec1ef28e80b453a1f6d842675af3814a76f955d7928718837d
3
+ size 79097
AimAndThrow/AimAndThrow-5000085.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18efeffc059716ba634c99d348556ed618250dd49452b76f4a4b4d5d2cd59f3b
3
+ size 454608
AimAndThrow/AimAndThrow-999996.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:191bbd1727693c55dd04f87fa9fdaf91f4ab48a7ff71fb0bd6830339f5718a1a
3
+ size 79097
AimAndThrow/AimAndThrow-999996.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65eb5c5ee010495213d0d78e35264eb27e86b2d7b245c548bd1f5fa6a761bd22
3
+ size 454541
AimAndThrow/checkpoint.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c33b64d2547b0b2121662f04dcc14d5f5e08e58bdfd3ae753f10c72cf47d373
3
  size 454005
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06bf9bcabe449179b9dab14719b0461bc53ea2fb4552c9f09851bef2528ed565
3
  size 454005
AimAndThrow/events.out.tfevents.1757063477.MSI-Thin-GF75.13936.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f2adadb5ac5ae2070b780f609a6729a1d029996e7cf459138a1095de2550e5
3
+ size 254365
README.md CHANGED
@@ -1,137 +1,35 @@
1
- ---
2
- library_name: ml-agents
3
- tags:
4
- - AimAndThrow
5
- - deep-reinforcement-learning
6
- - reinforcement-learning
7
- - ML-Agents-AimAndThrow
8
- license: mit
9
- language:
10
- - en
11
- ---
12
-
13
- ---
14
- You can get the unity environment from [GitHub](https://github.com/partzel/ToucanHush).
15
-
16
- ---
17
-
18
- # Model Card: PPO Agent on 12x12-GrassWorld Deterministic (HushToucans Environment)
19
-
20
- ## Model Details
21
-
22
- * **Model Type:** Proximal Policy Optimization (PPO)
23
- * **Framework:** [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3)
24
- * **Environment:** Custom Unity ML-Agents environment (*HushToucans-12x12-GrassWorld Deterministic*)
25
- * **Author:** Ahmed El Mahdi BENDOU
26
- * **License:** MIT
27
- * **Status:** Prototype (first-stage training, baseline policy)
28
-
29
- This model is the first trained policy in a planned **curriculum learning pipeline**. It demonstrates the agent’s ability to learn navigation and simple reward dynamics for throw actions.
30
-
31
- ---
32
-
33
- ## Intended Use
34
-
35
- * **Baseline reference** for future curriculum learning setups.
36
- * **Educational demonstration** of Unity ML-Agents + PPO training.
37
-
38
- Not intended for production or safety-critical applications.
39
-
40
- ---
41
-
42
- ## Environment Specification
43
-
44
- **Name:** 12x12-GrassWorld Deterministic
45
- **Grid size:** 12 × 12
46
-
47
- **Agent Actions:**
48
-
49
- * Move Forward
50
- * Move Backward
51
- * Turn Left / Turn Right
52
- * Throw Banana 🍌
53
-
54
- **Rewards:**
55
-
56
- * **+1** for reaching/scoring a stationed toucan
57
- * **-1** for bumping into walls
58
- * **-0.01** penalty per step (encourages efficiency)
59
-
60
- **Special Mechanic:**
61
-
62
- * Agent can *throw* at 27° to hit a distant toucan (as illustrated).
63
-
64
- ![Environment Specification](./12x12GrassWorld-Deterministic-specification.png)
65
-
66
- ---
67
-
68
- ## Training Details
69
- ```
70
- Trainer: PPO
71
- Max steps: 5,000,000
72
- Checkpoint frequency: every 200,000 steps
73
-
74
- Hyperparameters
75
- Batch size: 512
76
- Buffer size: 51,200
77
- Learning rate: 0.0001 (linear decay)
78
- β (entropy regularization): 0.001
79
- ε (PPO clip range): 0.2
80
- λ (GAE): 0.99
81
- Epochs per update: 3
82
- Time horizon: 1000
83
-
84
- Network Settings
85
- Hidden units: 128
86
- Layers: 2 fully connected
87
- Normalization: Enabled
88
- Reward Signals
89
- Extrinsic:
90
- γ = 0.99
91
- Strength = 1.0
92
- ```
93
-
94
- The policy has achieved **basic competency**: moving, avoiding walls, and occasionally scoring via the throw mechanic.
95
-
96
- ---
97
-
98
- ## Evaluation
99
-
100
- * **Observed Behavior:** The agent successfully navigates to targets but still exhibits inefficient wandering. Throwing is used inconsistently.
101
- * **Limitations:**
102
-
103
- * Overfitting to deterministic transitions.
104
- * Suboptimal exploration.
105
- * No stochasticity introduced yet (to be addressed in future curriculum).
106
-
107
- Future experiments will evaluate robustness in stochastic or adversarial variations of GrassWorld.
108
-
109
- ---
110
-
111
- ## Future Work
112
-
113
- This model is the **first step** in a broader **curriculum learning experiment**, which will involve:
114
-
115
- 1. Scaling from deterministic → stochastic environments.
116
- 2. Introducing dynamic rewards and multiple agents.
117
- 3. Logging and reproducibility reports hosted on GitHub.
118
-
119
- ---
120
-
121
- ## Citation
122
-
123
- If you use this model, please cite:
124
-
125
- ```bibtex
126
- @misc{yourname2025grassworldppo,
127
- author = {Ahmed El Mahdi BENDOU},
128
- title = {PPO Agent trained on ToucanHush 12x12-GrassWorld Deterministic (Unity ML-Agents)},
129
- year = {2025},
130
- howpublished = {\url{https://huggingface.co/partzel/ToucanHush-12x12GrassWorldDeterministic}},
131
- }
132
- ```
133
-
134
- ## Assets Pack
135
- All assets have been custom made for this environment and you can get them for free from [here](https://partzel.itch.io/hushtoucanassetpack)
136
-
137
- ---
 
1
+ ---
2
+ library_name: ml-agents
3
+ tags:
4
+ - AimAndThrow
5
+ - deep-reinforcement-learning
6
+ - reinforcement-learning
7
+ - ML-Agents-AimAndThrow
8
+ ---
9
+
10
+ # **ppo** Agent playing **AimAndThrow**
11
+ This is a trained model of a **ppo** agent playing **AimAndThrow**
12
+ using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
13
+
14
+ ## Usage (with ML-Agents)
15
+ The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
16
+
17
+ We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
18
+ - A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
19
+ browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
20
+ - A *longer tutorial* to understand how works ML-Agents:
21
+ https://huggingface.co/learn/deep-rl-course/unit5/introduction
22
+
23
+ ### Resume the training
24
+ ```bash
25
+ mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
26
+ ```
27
+
28
+ ### Watch your Agent play
29
+ You can watch your agent **playing directly in your browser**
30
+
31
+ 1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
32
+ 2. Step 1: Find your model_id: partzel/ToucanHush-12x12GrassWorldDeterministic
33
+ 3. Step 2: Select your *.nn /*.onnx file
34
+ 4. Click on Watch the agent play 👀
35
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1 +1 @@
1
- {"default_settings": null, "behaviors": {"AimAndThrow": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 512, "buffer_size": 51200, "learning_rate": 0.0001, "beta": 0.001, "epsilon": 0.2, "lambd": 0.99, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 500000, "network_settings": {"normalize": true, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 5000000, "time_horizon": 1000, "summary_freq": 200000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "ToucanHush-lr1e-4-bs512", "initialize_from": null, "load_model": false, "resume": true, "force": false, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
 
1
+ {"default_settings": null, "behaviors": {"AimAndThrow": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 512, "buffer_size": 51200, "learning_rate": 0.0001, "beta": 0.001, "epsilon": 0.2, "lambd": 0.99, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 500000, "network_settings": {"normalize": true, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 5000000, "time_horizon": 1000, "summary_freq": 200000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": 451, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "ToucanHush-lr1e-4-bs512-seed451", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}
configuration.yaml CHANGED
@@ -51,7 +51,7 @@ env_settings:
51
  num_envs: 1
52
  num_areas: 1
53
  timeout_wait: 60
54
- seed: -1
55
  max_lifetime_restarts: 10
56
  restarts_rate_limit_n: 1
57
  restarts_rate_limit_period_s: 60
@@ -66,11 +66,11 @@ engine_settings:
66
  no_graphics_monitor: false
67
  environment_parameters: null
68
  checkpoint_settings:
69
- run_id: ToucanHush-lr1e-4-bs512
70
  initialize_from: null
71
  load_model: false
72
- resume: true
73
- force: false
74
  train_model: false
75
  inference: false
76
  results_dir: results
 
51
  num_envs: 1
52
  num_areas: 1
53
  timeout_wait: 60
54
+ seed: 451
55
  max_lifetime_restarts: 10
56
  restarts_rate_limit_n: 1
57
  restarts_rate_limit_period_s: 60
 
66
  no_graphics_monitor: false
67
  environment_parameters: null
68
  checkpoint_settings:
69
+ run_id: ToucanHush-lr1e-4-bs512-seed451
70
  initialize_from: null
71
  load_model: false
72
+ resume: false
73
+ force: true
74
  train_model: false
75
  inference: false
76
  results_dir: results
run_logs/timers.json CHANGED
@@ -2,257 +2,257 @@
2
  "name": "root",
3
  "gauges": {
4
  "AimAndThrow.Policy.Entropy.mean": {
5
- "value": 1.3892475366592407,
6
- "min": 1.3832285404205322,
7
- "max": 1.4470560550689697,
8
- "count": 10
9
  },
10
  "AimAndThrow.Policy.Entropy.sum": {
11
- "value": 278138.46875,
12
- "min": 276291.59375,
13
- "max": 292536.84375,
14
- "count": 10
15
  },
16
  "AimAndThrow.Environment.EpisodeLength.mean": {
17
- "value": 170.75536480686696,
18
- "min": 168.5284139100933,
19
- "max": 180.49364791288565,
20
- "count": 10
21
  },
22
  "AimAndThrow.Environment.EpisodeLength.sum": {
23
- "value": 198930.0,
24
- "min": 198649.0,
25
- "max": 199080.0,
26
- "count": 10
27
  },
28
  "AimAndThrow.Step.mean": {
29
- "value": 4999907.0,
30
- "min": 3199911.0,
31
- "max": 4999907.0,
32
- "count": 10
33
  },
34
  "AimAndThrow.Step.sum": {
35
- "value": 4999907.0,
36
- "min": 3199911.0,
37
- "max": 4999907.0,
38
- "count": 10
39
  },
40
  "AimAndThrow.Policy.ExtrinsicValueEstimate.mean": {
41
- "value": -0.09314975887537003,
42
- "min": -0.12198472023010254,
43
- "max": -0.08240056782960892,
44
- "count": 10
45
  },
46
  "AimAndThrow.Policy.ExtrinsicValueEstimate.sum": {
47
- "value": -108.51947021484375,
48
- "min": -134.30517578125,
49
- "max": -97.1502685546875,
50
- "count": 10
51
  },
52
  "AimAndThrow.Environment.CumulativeReward.mean": {
53
- "value": -0.2722077760061993,
54
- "min": -0.3344314770737526,
55
- "max": -0.2292960595538598,
56
- "count": 10
57
  },
58
  "AimAndThrow.Environment.CumulativeReward.sum": {
59
- "value": -317.12205904722214,
60
- "min": -368.2090562582016,
61
- "max": -270.3400542140007,
62
- "count": 10
63
  },
64
  "AimAndThrow.Policy.ExtrinsicReward.mean": {
65
- "value": -0.2722077760061993,
66
- "min": -0.3344314770737526,
67
- "max": -0.2292960595538598,
68
- "count": 10
69
  },
70
  "AimAndThrow.Policy.ExtrinsicReward.sum": {
71
- "value": -317.12205904722214,
72
- "min": -368.2090562582016,
73
- "max": -270.3400542140007,
74
- "count": 10
75
  },
76
  "AimAndThrow.Losses.PolicyLoss.mean": {
77
- "value": 0.03534654652078947,
78
- "min": 0.034045236336338956,
79
- "max": 0.036389124503152244,
80
- "count": 10
81
  },
82
  "AimAndThrow.Losses.PolicyLoss.sum": {
83
- "value": 0.10603963956236839,
84
- "min": 0.10603963956236839,
85
- "max": 0.14457743012268714,
86
- "count": 10
87
  },
88
  "AimAndThrow.Losses.ValueLoss.mean": {
89
- "value": 0.04565222833719518,
90
- "min": 0.042183469543233515,
91
- "max": 0.04730776108801364,
92
- "count": 10
93
  },
94
  "AimAndThrow.Losses.ValueLoss.sum": {
95
- "value": 0.13695668501158553,
96
- "min": 0.12860161892448863,
97
- "max": 0.18923104435205457,
98
- "count": 10
99
  },
100
  "AimAndThrow.Policy.LearningRate.mean": {
101
- "value": 2.0405779595199996e-06,
102
- "min": 2.0405779595199996e-06,
103
- "max": 3.79480287187e-05,
104
- "count": 10
105
  },
106
  "AimAndThrow.Policy.LearningRate.sum": {
107
- "value": 6.121733878559999e-06,
108
- "min": 6.121733878559999e-06,
109
- "max": 0.00013741992258034,
110
- "count": 10
111
  },
112
  "AimAndThrow.Policy.Epsilon.mean": {
113
- "value": 0.10204048000000003,
114
- "min": 0.10204048000000003,
115
- "max": 0.13794796666666667,
116
- "count": 10
117
  },
118
  "AimAndThrow.Policy.Epsilon.sum": {
119
- "value": 0.3061214400000001,
120
- "min": 0.3061214400000001,
121
- "max": 0.5374196600000001,
122
- "count": 10
123
  },
124
  "AimAndThrow.Policy.Beta.mean": {
125
- "value": 3.0200752000000003e-05,
126
- "min": 3.0200752000000003e-05,
127
- "max": 0.00038568487000000006,
128
- "count": 10
129
  },
130
  "AimAndThrow.Policy.Beta.sum": {
131
- "value": 9.0602256e-05,
132
- "min": 9.0602256e-05,
133
- "max": 0.001400454634,
134
- "count": 10
135
  },
136
  "AimAndThrow.IsTraining.mean": {
137
  "value": 1.0,
138
  "min": 1.0,
139
  "max": 1.0,
140
- "count": 10
141
  },
142
  "AimAndThrow.IsTraining.sum": {
143
  "value": 1.0,
144
  "min": 1.0,
145
  "max": 1.0,
146
- "count": 10
147
  }
148
  },
149
  "metadata": {
150
  "timer_format_version": "0.1.0",
151
- "start_time_seconds": "1757016029",
152
  "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]",
153
- "command_line_arguments": "D:\\ml-agents\\venv\\Scripts\\mlagents-learn ../../config/AimAndThrow.yaml --resume --run-id=ToucanHush-lr1e-4-bs512",
154
  "mlagents_version": "1.2.0.dev0",
155
  "mlagents_envs_version": "1.2.0.dev0",
156
  "communication_protocol_version": "1.5.0",
157
  "pytorch_version": "2.8.0+cu129",
158
  "numpy_version": "1.23.5",
159
- "end_time_seconds": "1757018343"
160
  },
161
- "total": 2314.082258899987,
162
  "count": 1,
163
- "self": 0.006703999970341101,
164
  "children": {
165
  "run_training.setup": {
166
- "total": 0.1290828000055626,
167
  "count": 1,
168
- "self": 0.1290828000055626
169
  },
170
  "TrainerController.start_learning": {
171
- "total": 2313.9464721000113,
172
  "count": 1,
173
- "self": 2.3979349007859128,
174
  "children": {
175
  "TrainerController._reset_env": {
176
- "total": 18.20734400000947,
177
  "count": 1,
178
- "self": 18.20734400000947
179
  },
180
  "TrainerController.advance": {
181
- "total": 2293.2781405992137,
182
- "count": 133585,
183
- "self": 2.3464309999835677,
184
  "children": {
185
  "env_step": {
186
- "total": 1801.8451803984935,
187
- "count": 133585,
188
- "self": 1245.4353575954156,
189
  "children": {
190
  "SubprocessEnvManager._take_step": {
191
- "total": 554.7722125035652,
192
- "count": 133585,
193
- "self": 7.100844602406141,
194
  "children": {
195
  "TorchPolicy.evaluate": {
196
- "total": 547.6713679011591,
197
- "count": 125119,
198
- "self": 547.6713679011591
199
  }
200
  }
201
  },
202
  "workers": {
203
- "total": 1.637610299512744,
204
- "count": 133585,
205
  "self": 0.0,
206
  "children": {
207
  "worker_root": {
208
- "total": 2293.928678000215,
209
- "count": 133585,
210
  "is_parallel": true,
211
- "self": 1206.7310486966308,
212
  "children": {
213
  "steps_from_proto": {
214
- "total": 0.0004230999911669642,
215
  "count": 1,
216
  "is_parallel": true,
217
- "self": 0.00023099998361431062,
218
  "children": {
219
  "_process_rank_one_or_two_observation": {
220
- "total": 0.00019210000755265355,
221
  "count": 2,
222
  "is_parallel": true,
223
- "self": 0.00019210000755265355
224
  }
225
  }
226
  },
227
  "UnityEnvironment.step": {
228
- "total": 1087.197206203593,
229
- "count": 133585,
230
  "is_parallel": true,
231
- "self": 14.519432301283814,
232
  "children": {
233
  "UnityEnvironment._generate_step_input": {
234
- "total": 23.934972499875585,
235
- "count": 133585,
236
  "is_parallel": true,
237
- "self": 23.934972499875585
238
  },
239
  "communicator.exchange": {
240
- "total": 1011.1638868003356,
241
- "count": 133585,
242
  "is_parallel": true,
243
- "self": 1011.1638868003356
244
  },
245
  "steps_from_proto": {
246
- "total": 37.57891460209794,
247
- "count": 133585,
248
  "is_parallel": true,
249
- "self": 19.125529006996658,
250
  "children": {
251
  "_process_rank_one_or_two_observation": {
252
- "total": 18.45338559510128,
253
- "count": 267170,
254
  "is_parallel": true,
255
- "self": 18.45338559510128
256
  }
257
  }
258
  }
@@ -265,31 +265,31 @@
265
  }
266
  },
267
  "trainer_advance": {
268
- "total": 489.08652920073655,
269
- "count": 133585,
270
- "self": 4.662728499126388,
271
  "children": {
272
  "process_trajectory": {
273
- "total": 115.56687550162314,
274
- "count": 133585,
275
- "self": 115.2692794016184,
276
  "children": {
277
  "RLTrainer._checkpoint": {
278
- "total": 0.29759610000473913,
279
- "count": 4,
280
- "self": 0.29759610000473913
281
  }
282
  }
283
  },
284
  "_update_policy": {
285
- "total": 368.856925199987,
286
- "count": 38,
287
- "self": 199.19484069995815,
288
  "children": {
289
  "TorchPPOOptimizer.update": {
290
- "total": 169.66208450002887,
291
- "count": 11400,
292
- "self": 169.66208450002887
293
  }
294
  }
295
  }
@@ -298,19 +298,19 @@
298
  }
299
  },
300
  "trainer_threads": {
301
- "total": 7.00005330145359e-07,
302
  "count": 1,
303
- "self": 7.00005330145359e-07
304
  },
305
  "TrainerController._save_models": {
306
- "total": 0.06305189999693539,
307
  "count": 1,
308
- "self": 0.007433899998432025,
309
  "children": {
310
  "RLTrainer._checkpoint": {
311
- "total": 0.055617999998503365,
312
  "count": 1,
313
- "self": 0.055617999998503365
314
  }
315
  }
316
  }
 
2
  "name": "root",
3
  "gauges": {
4
  "AimAndThrow.Policy.Entropy.mean": {
5
+ "value": 0.459451287984848,
6
+ "min": 0.459451287984848,
7
+ "max": 1.78365159034729,
8
+ "count": 25
9
  },
10
  "AimAndThrow.Policy.Entropy.sum": {
11
+ "value": 92037.28125,
12
+ "min": 92037.28125,
13
+ "max": 360326.15625,
14
+ "count": 25
15
  },
16
  "AimAndThrow.Environment.EpisodeLength.mean": {
17
+ "value": 85.02537634408603,
18
+ "min": 85.02537634408603,
19
+ "max": 220.64745011086475,
20
+ "count": 25
21
  },
22
  "AimAndThrow.Environment.EpisodeLength.sum": {
23
+ "value": 197684.0,
24
+ "min": 197684.0,
25
+ "max": 199158.0,
26
+ "count": 25
27
  },
28
  "AimAndThrow.Step.mean": {
29
+ "value": 4999944.0,
30
+ "min": 199987.0,
31
+ "max": 4999944.0,
32
+ "count": 25
33
  },
34
  "AimAndThrow.Step.sum": {
35
+ "value": 4999944.0,
36
+ "min": 199987.0,
37
+ "max": 4999944.0,
38
+ "count": 25
39
  },
40
  "AimAndThrow.Policy.ExtrinsicValueEstimate.mean": {
41
+ "value": 0.3502625524997711,
42
+ "min": -0.361602783203125,
43
+ "max": 0.3502625524997711,
44
+ "count": 25
45
  },
46
  "AimAndThrow.Policy.ExtrinsicValueEstimate.sum": {
47
+ "value": 814.0101928710938,
48
+ "min": -326.16571044921875,
49
+ "max": 814.0101928710938,
50
+ "count": 25
51
  },
52
  "AimAndThrow.Environment.CumulativeReward.mean": {
53
+ "value": 0.6396350792501757,
54
+ "min": -1.1251290707282346,
55
+ "max": 0.6396350792501757,
56
+ "count": 25
57
  },
58
  "AimAndThrow.Environment.CumulativeReward.sum": {
59
+ "value": 1486.5119241774082,
60
+ "min": -1499.797051280737,
61
+ "max": 1486.5119241774082,
62
+ "count": 25
63
  },
64
  "AimAndThrow.Policy.ExtrinsicReward.mean": {
65
+ "value": 0.6396350792501757,
66
+ "min": -1.1251290707282346,
67
+ "max": 0.6396350792501757,
68
+ "count": 25
69
  },
70
  "AimAndThrow.Policy.ExtrinsicReward.sum": {
71
+ "value": 1486.5119241774082,
72
+ "min": -1499.797051280737,
73
+ "max": 1486.5119241774082,
74
+ "count": 25
75
  },
76
  "AimAndThrow.Losses.PolicyLoss.mean": {
77
+ "value": 0.03416703626164235,
78
+ "min": 0.034137498232303186,
79
+ "max": 0.03677766110825663,
80
+ "count": 25
81
  },
82
  "AimAndThrow.Losses.PolicyLoss.sum": {
83
+ "value": 0.1366681450465694,
84
+ "min": 0.10485064039084441,
85
+ "max": 0.14711064443302652,
86
+ "count": 25
87
  },
88
  "AimAndThrow.Losses.ValueLoss.mean": {
89
+ "value": 0.04043484590481967,
90
+ "min": 0.020302016613228866,
91
+ "max": 0.05864293771175047,
92
+ "count": 25
93
  },
94
  "AimAndThrow.Losses.ValueLoss.sum": {
95
+ "value": 0.16173938361927867,
96
+ "min": 0.08120806645291546,
97
+ "max": 0.23457175084700188,
98
+ "count": 25
99
  },
100
  "AimAndThrow.Policy.LearningRate.mean": {
101
+ "value": 1.9945730055250013e-06,
102
+ "min": 1.9945730055250013e-06,
103
+ "max": 9.794401538932e-05,
104
+ "count": 25
105
  },
106
  "AimAndThrow.Policy.LearningRate.sum": {
107
+ "value": 7.978292022100005e-06,
108
+ "min": 7.978292022100005e-06,
109
+ "max": 0.0003773958226042,
110
+ "count": 25
111
  },
112
  "AimAndThrow.Policy.Epsilon.mean": {
113
+ "value": 0.10199447500000003,
114
+ "min": 0.10199447500000003,
115
+ "max": 0.19794401333333336,
116
+ "count": 25
117
  },
118
  "AimAndThrow.Policy.Epsilon.sum": {
119
+ "value": 0.4079779000000001,
120
+ "min": 0.36600964,
121
+ "max": 0.7773957999999999,
122
+ "count": 25
123
  },
124
  "AimAndThrow.Policy.Beta.mean": {
125
+ "value": 2.9745302500000005e-05,
126
+ "min": 2.9745302500000005e-05,
127
+ "max": 0.0009796457320000002,
128
+ "count": 25
129
  },
130
  "AimAndThrow.Policy.Beta.sum": {
131
+ "value": 0.00011898121000000002,
132
+ "min": 0.00011898121000000002,
133
+ "max": 0.0037762184200000005,
134
+ "count": 25
135
  },
136
  "AimAndThrow.IsTraining.mean": {
137
  "value": 1.0,
138
  "min": 1.0,
139
  "max": 1.0,
140
+ "count": 25
141
  },
142
  "AimAndThrow.IsTraining.sum": {
143
  "value": 1.0,
144
  "min": 1.0,
145
  "max": 1.0,
146
+ "count": 25
147
  }
148
  },
149
  "metadata": {
150
  "timer_format_version": "0.1.0",
151
+ "start_time_seconds": "1757063460",
152
  "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]",
153
+ "command_line_arguments": "D:\\ml-agents\\venv\\Scripts\\mlagents-learn ../../config/AimAndThrow.yaml --run-id=ToucanHush-lr1e-4-bs512-seed451 --seed=451 --force",
154
  "mlagents_version": "1.2.0.dev0",
155
  "mlagents_envs_version": "1.2.0.dev0",
156
  "communication_protocol_version": "1.5.0",
157
  "pytorch_version": "2.8.0+cu129",
158
  "numpy_version": "1.23.5",
159
+ "end_time_seconds": "1757070020"
160
  },
161
+ "total": 6560.5212958,
162
  "count": 1,
163
+ "self": 0.007863500000894419,
164
  "children": {
165
  "run_training.setup": {
166
+ "total": 0.08756299999998873,
167
  "count": 1,
168
+ "self": 0.08756299999998873
169
  },
170
  "TrainerController.start_learning": {
171
+ "total": 6560.425869299999,
172
  "count": 1,
173
+ "self": 7.095670400263771,
174
  "children": {
175
  "TrainerController._reset_env": {
176
+ "total": 20.534740399999976,
177
  "count": 1,
178
+ "self": 20.534740399999976
179
  },
180
  "TrainerController.advance": {
181
+ "total": 6532.7125493997355,
182
+ "count": 340583,
183
+ "self": 7.2223678995742375,
184
  "children": {
185
  "env_step": {
186
+ "total": 5119.653573500246,
187
+ "count": 340583,
188
+ "self": 3649.4608105005127,
189
  "children": {
190
  "SubprocessEnvManager._take_step": {
191
+ "total": 1465.4144640998343,
192
+ "count": 340583,
193
+ "self": 21.73783139998841,
194
  "children": {
195
  "TorchPolicy.evaluate": {
196
+ "total": 1443.6766326998459,
197
+ "count": 312572,
198
+ "self": 1443.6766326998459
199
  }
200
  }
201
  },
202
  "workers": {
203
+ "total": 4.778298899899255,
204
+ "count": 340583,
205
  "self": 0.0,
206
  "children": {
207
  "worker_root": {
208
+ "total": 6533.0349240996875,
209
+ "count": 340583,
210
  "is_parallel": true,
211
+ "self": 3347.4401770996496,
212
  "children": {
213
  "steps_from_proto": {
214
+ "total": 0.0006743000000142274,
215
  "count": 1,
216
  "is_parallel": true,
217
+ "self": 0.0003619000000298911,
218
  "children": {
219
  "_process_rank_one_or_two_observation": {
220
+ "total": 0.0003123999999843363,
221
  "count": 2,
222
  "is_parallel": true,
223
+ "self": 0.0003123999999843363
224
  }
225
  }
226
  },
227
  "UnityEnvironment.step": {
228
+ "total": 3185.594072700038,
229
+ "count": 340583,
230
  "is_parallel": true,
231
+ "self": 41.29519420048928,
232
  "children": {
233
  "UnityEnvironment._generate_step_input": {
234
+ "total": 68.51995659977729,
235
+ "count": 340583,
236
  "is_parallel": true,
237
+ "self": 68.51995659977729
238
  },
239
  "communicator.exchange": {
240
+ "total": 2971.9692622997804,
241
+ "count": 340583,
242
  "is_parallel": true,
243
+ "self": 2971.9692622997804
244
  },
245
  "steps_from_proto": {
246
+ "total": 103.80965959999088,
247
+ "count": 340583,
248
  "is_parallel": true,
249
+ "self": 53.13167199986731,
250
  "children": {
251
  "_process_rank_one_or_two_observation": {
252
+ "total": 50.67798760012357,
253
+ "count": 681166,
254
  "is_parallel": true,
255
+ "self": 50.67798760012357
256
  }
257
  }
258
  }
 
265
  }
266
  },
267
  "trainer_advance": {
268
+ "total": 1405.8366079999153,
269
+ "count": 340583,
270
+ "self": 12.847537699994746,
271
  "children": {
272
  "process_trajectory": {
273
+ "total": 370.5619708999176,
274
+ "count": 340583,
275
+ "self": 369.56727029991805,
276
  "children": {
277
  "RLTrainer._checkpoint": {
278
+ "total": 0.9947005999995326,
279
+ "count": 10,
280
+ "self": 0.9947005999995326
281
  }
282
  }
283
  },
284
  "_update_policy": {
285
+ "total": 1022.427099400003,
286
+ "count": 97,
287
+ "self": 550.2304691000079,
288
  "children": {
289
  "TorchPPOOptimizer.update": {
290
+ "total": 472.19663029999515,
291
+ "count": 29100,
292
+ "self": 472.19663029999515
293
  }
294
  }
295
  }
 
298
  }
299
  },
300
  "trainer_threads": {
301
+ "total": 6.000000212225132e-07,
302
  "count": 1,
303
+ "self": 6.000000212225132e-07
304
  },
305
  "TrainerController._save_models": {
306
+ "total": 0.08290849999957572,
307
  "count": 1,
308
+ "self": 0.01336419999915961,
309
  "children": {
310
  "RLTrainer._checkpoint": {
311
+ "total": 0.06954430000041611,
312
  "count": 1,
313
+ "self": 0.06954430000041611
314
  }
315
  }
316
  }
run_logs/training_status.json CHANGED
@@ -2,103 +2,103 @@
2
  "AimAndThrow": {
3
  "checkpoints": [
4
  {
5
- "steps": 1999842,
6
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-1999842.onnx",
7
- "reward": -0.32050378287016457,
8
- "creation_time": 1757014111.006714,
9
  "auxillary_file_paths": [
10
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-1999842.pt"
11
  ]
12
  },
13
  {
14
- "steps": 2499936,
15
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2499936.onnx",
16
- "reward": -0.25919144333264477,
17
- "creation_time": 1757014766.098421,
18
  "auxillary_file_paths": [
19
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2499936.pt"
20
  ]
21
  },
22
  {
23
- "steps": 2999763,
24
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2999763.onnx",
25
- "reward": -0.3543306884746398,
26
- "creation_time": 1757015392.6672244,
27
  "auxillary_file_paths": [
28
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2999763.pt"
29
  ]
30
  },
31
  {
32
- "steps": 3000013,
33
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.onnx",
34
- "reward": -0.35946404337883,
35
- "creation_time": 1757015392.7439406,
36
  "auxillary_file_paths": [
37
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.pt"
38
  ]
39
  },
40
  {
41
- "steps": 3000013,
42
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.onnx",
43
- "reward": null,
44
- "creation_time": 1757016009.2596812,
45
  "auxillary_file_paths": [
46
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.pt"
47
  ]
48
  },
49
  {
50
- "steps": 3499965,
51
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3499965.onnx",
52
- "reward": -0.2504000486298041,
53
- "creation_time": 1757016617.46741,
54
  "auxillary_file_paths": [
55
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3499965.pt"
56
  ]
57
  },
58
  {
59
- "steps": 3999938,
60
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3999938.onnx",
61
- "reward": -0.3533456439481062,
62
- "creation_time": 1757017200.9901583,
63
  "auxillary_file_paths": [
64
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3999938.pt"
65
  ]
66
  },
67
  {
68
- "steps": 4499955,
69
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4499955.onnx",
70
- "reward": -0.29398596328748783,
71
- "creation_time": 1757017777.2994728,
72
  "auxillary_file_paths": [
73
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4499955.pt"
74
  ]
75
  },
76
  {
77
- "steps": 4999907,
78
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4999907.onnx",
79
- "reward": -0.31742338289817174,
80
- "creation_time": 1757018343.732425,
81
  "auxillary_file_paths": [
82
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4999907.pt"
83
  ]
84
  },
85
  {
86
- "steps": 5000142,
87
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.onnx",
88
- "reward": -0.3161561958417544,
89
- "creation_time": 1757018343.7977736,
90
  "auxillary_file_paths": [
91
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.pt"
92
  ]
93
  }
94
  ],
95
  "final_checkpoint": {
96
- "steps": 5000142,
97
- "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow.onnx",
98
- "reward": -0.3161561958417544,
99
- "creation_time": 1757018343.7977736,
100
  "auxillary_file_paths": [
101
- "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.pt"
102
  ]
103
  }
104
  },
 
2
  "AimAndThrow": {
3
  "checkpoints": [
4
  {
5
+ "steps": 999996,
6
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-999996.onnx",
7
+ "reward": -0.5190732231954249,
8
+ "creation_time": 1757064777.69313,
9
  "auxillary_file_paths": [
10
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-999996.pt"
11
  ]
12
  },
13
  {
14
+ "steps": 1499763,
15
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1499763.onnx",
16
+ "reward": -0.5913965953835125,
17
+ "creation_time": 1757065426.7471402,
18
  "auxillary_file_paths": [
19
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1499763.pt"
20
  ]
21
  },
22
  {
23
+ "steps": 1999828,
24
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1999828.onnx",
25
+ "reward": -0.037763210601712525,
26
+ "creation_time": 1757066065.8641207,
27
  "auxillary_file_paths": [
28
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1999828.pt"
29
  ]
30
  },
31
  {
32
+ "steps": 2499893,
33
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2499893.onnx",
34
+ "reward": 0.3667246930898274,
35
+ "creation_time": 1757066714.01822,
36
  "auxillary_file_paths": [
37
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2499893.pt"
38
  ]
39
  },
40
  {
41
+ "steps": 2999898,
42
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2999898.onnx",
43
+ "reward": 0.4242128275408603,
44
+ "creation_time": 1757067357.4376752,
45
  "auxillary_file_paths": [
46
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2999898.pt"
47
  ]
48
  },
49
  {
50
+ "steps": 3499955,
51
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3499955.onnx",
52
+ "reward": 0.5942972659527719,
53
+ "creation_time": 1757068020.0002558,
54
  "auxillary_file_paths": [
55
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3499955.pt"
56
  ]
57
  },
58
  {
59
+ "steps": 3999903,
60
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3999903.onnx",
61
+ "reward": 0.6044068104187346,
62
+ "creation_time": 1757068694.08997,
63
  "auxillary_file_paths": [
64
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3999903.pt"
65
  ]
66
  },
67
  {
68
+ "steps": 4499878,
69
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4499878.onnx",
70
+ "reward": 0.6381313523091829,
71
+ "creation_time": 1757069358.1493115,
72
  "auxillary_file_paths": [
73
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4499878.pt"
74
  ]
75
  },
76
  {
77
+ "steps": 4999944,
78
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4999944.onnx",
79
+ "reward": 0.6078923956094036,
80
+ "creation_time": 1757070020.4738462,
81
  "auxillary_file_paths": [
82
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4999944.pt"
83
  ]
84
  },
85
  {
86
+ "steps": 5000085,
87
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.onnx",
88
+ "reward": 0.6087707160961016,
89
+ "creation_time": 1757070020.5539749,
90
  "auxillary_file_paths": [
91
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.pt"
92
  ]
93
  }
94
  ],
95
  "final_checkpoint": {
96
+ "steps": 5000085,
97
+ "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow.onnx",
98
+ "reward": 0.6087707160961016,
99
+ "creation_time": 1757070020.5539749,
100
  "auxillary_file_paths": [
101
+ "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.pt"
102
  ]
103
  }
104
  },