partzel commited on Sep 5, 2025

Commit

25ea751

verified ·

1 Parent(s): bf56587

imp: use seed 451

Browse files

Files changed (30) hide show

AimAndThrow.onnx +1 -1
AimAndThrow/AimAndThrow-1499763.onnx +3 -0
AimAndThrow/AimAndThrow-1499763.pt +3 -0
AimAndThrow/AimAndThrow-1999828.onnx +3 -0
AimAndThrow/AimAndThrow-1999828.pt +3 -0
AimAndThrow/AimAndThrow-2172.onnx +3 -0
AimAndThrow/AimAndThrow-2172.pt +3 -0
AimAndThrow/AimAndThrow-2499893.onnx +3 -0
AimAndThrow/AimAndThrow-2499893.pt +3 -0
AimAndThrow/AimAndThrow-2999898.onnx +3 -0
AimAndThrow/AimAndThrow-2999898.pt +3 -0
AimAndThrow/AimAndThrow-3499955.onnx +3 -0
AimAndThrow/AimAndThrow-3499955.pt +3 -0
AimAndThrow/AimAndThrow-3999903.onnx +3 -0
AimAndThrow/AimAndThrow-3999903.pt +3 -0
AimAndThrow/AimAndThrow-4499878.onnx +3 -0
AimAndThrow/AimAndThrow-4499878.pt +3 -0
AimAndThrow/AimAndThrow-4999944.onnx +3 -0
AimAndThrow/AimAndThrow-4999944.pt +3 -0
AimAndThrow/AimAndThrow-5000085.onnx +3 -0
AimAndThrow/AimAndThrow-5000085.pt +3 -0
AimAndThrow/AimAndThrow-999996.onnx +3 -0
AimAndThrow/AimAndThrow-999996.pt +3 -0
AimAndThrow/checkpoint.pt +1 -1
AimAndThrow/events.out.tfevents.1757063477.MSI-Thin-GF75.13936.0 +3 -0
README.md +35 -137
config.json +1 -1
configuration.yaml +4 -4
run_logs/timers.json +158 -158
run_logs/training_status.json +55 -55

AimAndThrow.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7101dd073c902311a675f61994cc62f5d899521265b256d91b7e86c0c77317c
 size 79097

 version https://git-lfs.github.com/spec/v1
+oid sha256:5036d175da74a2ec1ef28e80b453a1f6d842675af3814a76f955d7928718837d
 size 79097

AimAndThrow/AimAndThrow-1499763.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:792b64e7f6998caacc3572444bd86ff44233a93b5df34ddeb43c58fe4f41e29c
+size 79097

AimAndThrow/AimAndThrow-1499763.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16cd5d27aec79aee01af7ac64423c619e3f8ee8650b9b120cce72778d14123d6
+size 454608

AimAndThrow/AimAndThrow-1999828.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92820ba8ad80c85d3b642db78f54a2e86692904aaac248cb76e7962d21e5867f
+size 79097

AimAndThrow/AimAndThrow-1999828.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca72850e398ce8617889b0e03b82b8b9888f173afdd78186515f1099b92474c7
+size 454608

AimAndThrow/AimAndThrow-2172.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4280c10837be0a2d3629dc50f318f86af42e8251af776686a868c324f33cc0d2
+size 78644

AimAndThrow/AimAndThrow-2172.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49567fd2b5c8e038ef9bbee68c00f2e06ddd689aa9d7af4365ad768601ac0276
+size 156335

AimAndThrow/AimAndThrow-2499893.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d511e17c2fd60047b8858505f92084d4389d208ff86658d5a3a4e59e4767e97
+size 79097

AimAndThrow/AimAndThrow-2499893.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0e5018133a8c0e68901893cc4b525d8d826d8c491f948ae4335128a480c2592
+size 454608

AimAndThrow/AimAndThrow-2999898.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94de3d0f1e4a03cf893bfd365cade1ed33aed1a021939fc93d8baa2284631814
+size 79097

AimAndThrow/AimAndThrow-2999898.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4181926ed992ebd051c56430c66bed3ced71ab4a42a38362202119f4d32dd81c
+size 454608

AimAndThrow/AimAndThrow-3499955.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:760413c5a952230ddca36ee2a66b295422a999db38d67f0b2e76b8c371f17c84
+size 79097

AimAndThrow/AimAndThrow-3499955.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73a1216c2079378efc08724d4d29b2175be3ad2fb1bab89e04cf3b540010b97b
+size 454608

AimAndThrow/AimAndThrow-3999903.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae1aad3f10c313f03f59095b3a5db5b27edf7be1bd1621fcc69d5e86564876ee
+size 79097

AimAndThrow/AimAndThrow-3999903.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00554f0b2348b8f7577fef5ddf5f1a9867ce079dd6fe00e7357cd5b207d644b6
+size 454608

AimAndThrow/AimAndThrow-4499878.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee9f8aa1e471dbfba5f8f686dec19416a331947d5bf280a0207d3b20ff27814
+size 79097

AimAndThrow/AimAndThrow-4499878.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a77cc438bdc7021db82608f154c7c69eef56aac2f22d23f707045656d1c60e
+size 454608

AimAndThrow/AimAndThrow-4999944.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93f4cfaf5dd9806a0fcc8694bfd1f59de7f7d5a382dec354a2599a9d806987df
+size 79097

AimAndThrow/AimAndThrow-4999944.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f23f166ae577d39a7a82dd2d36dfaa4865a6ab8eb50f0a3059f80e99b07e2c1
+size 454608

AimAndThrow/AimAndThrow-5000085.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5036d175da74a2ec1ef28e80b453a1f6d842675af3814a76f955d7928718837d
+size 79097

AimAndThrow/AimAndThrow-5000085.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18efeffc059716ba634c99d348556ed618250dd49452b76f4a4b4d5d2cd59f3b
+size 454608

AimAndThrow/AimAndThrow-999996.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:191bbd1727693c55dd04f87fa9fdaf91f4ab48a7ff71fb0bd6830339f5718a1a
+size 79097

AimAndThrow/AimAndThrow-999996.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65eb5c5ee010495213d0d78e35264eb27e86b2d7b245c548bd1f5fa6a761bd22
+size 454541

AimAndThrow/checkpoint.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c33b64d2547b0b2121662f04dcc14d5f5e08e58bdfd3ae753f10c72cf47d373
 size 454005

 version https://git-lfs.github.com/spec/v1
+oid sha256:06bf9bcabe449179b9dab14719b0461bc53ea2fb4552c9f09851bef2528ed565
 size 454005

AimAndThrow/events.out.tfevents.1757063477.MSI-Thin-GF75.13936.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81f2adadb5ac5ae2070b780f609a6729a1d029996e7cf459138a1095de2550e5
+size 254365

README.md CHANGED Viewed

@@ -1,137 +1,35 @@
----
-library_name: ml-agents
-tags:
-- AimAndThrow
-- deep-reinforcement-learning
-- reinforcement-learning
-- ML-Agents-AimAndThrow
-license: mit
-language:
-- en
----
----
-You can get the unity environment from [GitHub](https://github.com/partzel/ToucanHush).
----
-# Model Card: PPO Agent on 12x12-GrassWorld Deterministic (HushToucans Environment)
-## Model Details
-* **Model Type:** Proximal Policy Optimization (PPO)
-* **Framework:** [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3)
-* **Environment:** Custom Unity ML-Agents environment (*HushToucans-12x12-GrassWorld Deterministic*)
-* **Author:** Ahmed El Mahdi BENDOU
-* **License:** MIT
-* **Status:** Prototype (first-stage training, baseline policy)
-This model is the first trained policy in a planned **curriculum learning pipeline**. It demonstrates the agent’s ability to learn navigation and simple reward dynamics for throw actions.
----
-## Intended Use
-* **Baseline reference** for future curriculum learning setups.
-* **Educational demonstration** of Unity ML-Agents + PPO training.
-Not intended for production or safety-critical applications.
----
-## Environment Specification
-**Name:** 12x12-GrassWorld Deterministic
-**Grid size:** 12 × 12
-**Agent Actions:**
-* Move Forward
-* Move Backward
-* Turn Left / Turn Right
-* Throw Banana 🍌
-**Rewards:**
-* **+1** for reaching/scoring a stationed toucan
-* **-1** for bumping into walls
-* **-0.01** penalty per step (encourages efficiency)
-**Special Mechanic:**
-* Agent can *throw* at 27° to hit a distant toucan (as illustrated).
-![Environment Specification](./12x12GrassWorld-Deterministic-specification.png)
----
-## Training Details
-```
-Trainer: PPO
-Max steps: 5,000,000
-Checkpoint frequency: every 200,000 steps
-Hyperparameters
-  Batch size: 512
-  Buffer size: 51,200
-  Learning rate: 0.0001 (linear decay)
-  β (entropy regularization): 0.001
-  ε (PPO clip range): 0.2
-  λ (GAE): 0.99
-  Epochs per update: 3
-  Time horizon: 1000
-  Network Settings
-    Hidden units: 128
-    Layers: 2 fully connected
-    Normalization: Enabled
-  Reward Signals
-    Extrinsic:
-      γ = 0.99
-      Strength = 1.0
-```
-The policy has achieved **basic competency**: moving, avoiding walls, and occasionally scoring via the throw mechanic.
----
-## Evaluation
-* **Observed Behavior:** The agent successfully navigates to targets but still exhibits inefficient wandering. Throwing is used inconsistently.
-* **Limitations:**
-  * Overfitting to deterministic transitions.
-  * Suboptimal exploration.
-  * No stochasticity introduced yet (to be addressed in future curriculum).
-Future experiments will evaluate robustness in stochastic or adversarial variations of GrassWorld.
----
-## Future Work
-This model is the **first step** in a broader **curriculum learning experiment**, which will involve:
-1. Scaling from deterministic → stochastic environments.
-2. Introducing dynamic rewards and multiple agents.
-3. Logging and reproducibility reports hosted on GitHub.
----
-## Citation
-If you use this model, please cite:
-```bibtex
-@misc{yourname2025grassworldppo,
-  author       = {Ahmed El Mahdi BENDOU},
-  title        = {PPO Agent trained on ToucanHush 12x12-GrassWorld Deterministic (Unity ML-Agents)},
-  year         = {2025},
-  howpublished = {\url{https://huggingface.co/partzel/ToucanHush-12x12GrassWorldDeterministic}},
-}
-```
-## Assets Pack
-All assets have been custom made for this environment and you can get them for free from [here](https://partzel.itch.io/hushtoucanassetpack)
----

+---
+library_name: ml-agents
+tags:
+- AimAndThrow
+- deep-reinforcement-learning
+- reinforcement-learning
+- ML-Agents-AimAndThrow
+---
+  # **ppo** Agent playing **AimAndThrow**
+  This is a trained model of a **ppo** agent playing **AimAndThrow**
+  using the [Unity ML-Agents Library](https://github.com/Unity-Technologies/ml-agents).
+  ## Usage (with ML-Agents)
+  The Documentation: https://unity-technologies.github.io/ml-agents/ML-Agents-Toolkit-Documentation/
+  We wrote a complete tutorial to learn to train your first agent using ML-Agents and publish it to the Hub:
+  - A *short tutorial* where you teach Huggy the Dog 🐶 to fetch the stick and then play with him directly in your
+  browser: https://huggingface.co/learn/deep-rl-course/unitbonus1/introduction
+  - A *longer tutorial* to understand how works ML-Agents:
+  https://huggingface.co/learn/deep-rl-course/unit5/introduction
+  ### Resume the training
+  ```bash
+  mlagents-learn <your_configuration_file_path.yaml> --run-id=<run_id> --resume
+  ```
+  ### Watch your Agent play
+  You can watch your agent **playing directly in your browser**
+  1. If the environment is part of ML-Agents official environments, go to https://huggingface.co/unity
+  2. Step 1: Find your model_id: partzel/ToucanHush-12x12GrassWorldDeterministic
+  3. Step 2: Select your *.nn /*.onnx file
+  4. Click on Watch the agent play 👀

config.json CHANGED Viewed

@@ -1 +1 @@

- {"default_settings": null, "behaviors": {"AimAndThrow": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 512, "buffer_size": 51200, "learning_rate": 0.0001, "beta": 0.001, "epsilon": 0.2, "lambd": 0.99, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 500000, "network_settings": {"normalize": true, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 5000000, "time_horizon": 1000, "summary_freq": 200000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": -1, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "ToucanHush-lr1e-4-bs512", "initialize_from": null, "load_model": false, "resume": ~~true~~, "force": ~~false~~, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

+ {"default_settings": null, "behaviors": {"AimAndThrow": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 512, "buffer_size": 51200, "learning_rate": 0.0001, "beta": 0.001, "epsilon": 0.2, "lambd": 0.99, "num_epoch": 3, "shared_critic": false, "learning_rate_schedule": "linear", "beta_schedule": "linear", "epsilon_schedule": "linear"}, "checkpoint_interval": 500000, "network_settings": {"normalize": true, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0, "network_settings": {"normalize": false, "hidden_units": 128, "num_layers": 2, "vis_encode_type": "simple", "memory": null, "goal_conditioning_type": "hyper", "deterministic": false}}}, "init_path": null, "keep_checkpoints": 10, "even_checkpoints": false, "max_steps": 5000000, "time_horizon": 1000, "summary_freq": 200000, "threaded": false, "self_play": null, "behavioral_cloning": null}}, "env_settings": {"env_path": null, "env_args": null, "base_port": 5005, "num_envs": 1, "num_areas": 1, "timeout_wait": 60, "seed": 451, "max_lifetime_restarts": 10, "restarts_rate_limit_n": 1, "restarts_rate_limit_period_s": 60}, "engine_settings": {"width": 84, "height": 84, "quality_level": 5, "time_scale": 20, "target_frame_rate": -1, "capture_frame_rate": 60, "no_graphics": false, "no_graphics_monitor": false}, "environment_parameters": null, "checkpoint_settings": {"run_id": "ToucanHush-lr1e-4-bs512-seed451", "initialize_from": null, "load_model": false, "resume": false, "force": true, "train_model": false, "inference": false, "results_dir": "results"}, "torch_settings": {"device": null}, "debug": false}

configuration.yaml CHANGED Viewed

@@ -51,7 +51,7 @@ env_settings:
   num_envs: 1
   num_areas: 1
   timeout_wait: 60
-  seed: -1
   max_lifetime_restarts: 10
   restarts_rate_limit_n: 1
   restarts_rate_limit_period_s: 60
@@ -66,11 +66,11 @@ engine_settings:
   no_graphics_monitor: false
 environment_parameters: null
 checkpoint_settings:
-  run_id: ToucanHush-lr1e-4-bs512
   initialize_from: null
   load_model: false
-  resume: true
-  force: false
   train_model: false
   inference: false
   results_dir: results

   num_envs: 1
   num_areas: 1
   timeout_wait: 60
+  seed: 451
   max_lifetime_restarts: 10
   restarts_rate_limit_n: 1
   restarts_rate_limit_period_s: 60
   no_graphics_monitor: false
 environment_parameters: null
 checkpoint_settings:
+  run_id: ToucanHush-lr1e-4-bs512-seed451
   initialize_from: null
   load_model: false
+  resume: false
+  force: true
   train_model: false
   inference: false
   results_dir: results

run_logs/timers.json CHANGED Viewed

@@ -2,257 +2,257 @@
     "name": "root",
     "gauges": {
         "AimAndThrow.Policy.Entropy.mean": {
-            "value": 1.3892475366592407,
-            "min": 1.3832285404205322,
-            "max": 1.4470560550689697,
-            "count": 10
         },
         "AimAndThrow.Policy.Entropy.sum": {
-            "value": 278138.46875,
-            "min": 276291.59375,
-            "max": 292536.84375,
-            "count": 10
         },
         "AimAndThrow.Environment.EpisodeLength.mean": {
-            "value": 170.75536480686696,
-            "min": 168.5284139100933,
-            "max": 180.49364791288565,
-            "count": 10
         },
         "AimAndThrow.Environment.EpisodeLength.sum": {
-            "value": 198930.0,
-            "min": 198649.0,
-            "max": 199080.0,
-            "count": 10
         },
         "AimAndThrow.Step.mean": {
-            "value": 4999907.0,
-            "min": 3199911.0,
-            "max": 4999907.0,
-            "count": 10
         },
         "AimAndThrow.Step.sum": {
-            "value": 4999907.0,
-            "min": 3199911.0,
-            "max": 4999907.0,
-            "count": 10
         },
         "AimAndThrow.Policy.ExtrinsicValueEstimate.mean": {
-            "value": -0.09314975887537003,
-            "min": -0.12198472023010254,
-            "max": -0.08240056782960892,
-            "count": 10
         },
         "AimAndThrow.Policy.ExtrinsicValueEstimate.sum": {
-            "value": -108.51947021484375,
-            "min": -134.30517578125,
-            "max": -97.1502685546875,
-            "count": 10
         },
         "AimAndThrow.Environment.CumulativeReward.mean": {
-            "value": -0.2722077760061993,
-            "min": -0.3344314770737526,
-            "max": -0.2292960595538598,
-            "count": 10
         },
         "AimAndThrow.Environment.CumulativeReward.sum": {
-            "value": -317.12205904722214,
-            "min": -368.2090562582016,
-            "max": -270.3400542140007,
-            "count": 10
         },
         "AimAndThrow.Policy.ExtrinsicReward.mean": {
-            "value": -0.2722077760061993,
-            "min": -0.3344314770737526,
-            "max": -0.2292960595538598,
-            "count": 10
         },
         "AimAndThrow.Policy.ExtrinsicReward.sum": {
-            "value": -317.12205904722214,
-            "min": -368.2090562582016,
-            "max": -270.3400542140007,
-            "count": 10
         },
         "AimAndThrow.Losses.PolicyLoss.mean": {
-            "value": 0.03534654652078947,
-            "min": 0.034045236336338956,
-            "max": 0.036389124503152244,
-            "count": 10
         },
         "AimAndThrow.Losses.PolicyLoss.sum": {
-            "value": 0.10603963956236839,
-            "min": 0.10603963956236839,
-            "max": 0.14457743012268714,
-            "count": 10
         },
         "AimAndThrow.Losses.ValueLoss.mean": {
-            "value": 0.04565222833719518,
-            "min": 0.042183469543233515,
-            "max": 0.04730776108801364,
-            "count": 10
         },
         "AimAndThrow.Losses.ValueLoss.sum": {
-            "value": 0.13695668501158553,
-            "min": 0.12860161892448863,
-            "max": 0.18923104435205457,
-            "count": 10
         },
         "AimAndThrow.Policy.LearningRate.mean": {
-            "value": 2.0405779595199996e-06,
-            "min": 2.0405779595199996e-06,
-            "max": 3.79480287187e-05,
-            "count": 10
         },
         "AimAndThrow.Policy.LearningRate.sum": {
-            "value": 6.121733878559999e-06,
-            "min": 6.121733878559999e-06,
-            "max": 0.00013741992258034,
-            "count": 10
         },
         "AimAndThrow.Policy.Epsilon.mean": {
-            "value": 0.10204048000000003,
-            "min": 0.10204048000000003,
-            "max": 0.13794796666666667,
-            "count": 10
         },
         "AimAndThrow.Policy.Epsilon.sum": {
-            "value": 0.3061214400000001,
-            "min": 0.3061214400000001,
-            "max": 0.5374196600000001,
-            "count": 10
         },
         "AimAndThrow.Policy.Beta.mean": {
-            "value": 3.0200752000000003e-05,
-            "min": 3.0200752000000003e-05,
-            "max": 0.00038568487000000006,
-            "count": 10
         },
         "AimAndThrow.Policy.Beta.sum": {
-            "value": 9.0602256e-05,
-            "min": 9.0602256e-05,
-            "max": 0.001400454634,
-            "count": 10
         },
         "AimAndThrow.IsTraining.mean": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
-            "count": 10
         },
         "AimAndThrow.IsTraining.sum": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
-            "count": 10
         }
     },
     "metadata": {
         "timer_format_version": "0.1.0",
-        "start_time_seconds": "1757016029",
         "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]",
-        "command_line_arguments": "D:\\ml-agents\\venv\\Scripts\\mlagents-learn ../../config/AimAndThrow.yaml --resume --run-id=ToucanHush-lr1e-4-bs512",
         "mlagents_version": "1.2.0.dev0",
         "mlagents_envs_version": "1.2.0.dev0",
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.8.0+cu129",
         "numpy_version": "1.23.5",
-        "end_time_seconds": "1757018343"
     },
-    "total": 2314.082258899987,
     "count": 1,
-    "self": 0.006703999970341101,
     "children": {
         "run_training.setup": {
-            "total": 0.1290828000055626,
             "count": 1,
-            "self": 0.1290828000055626
         },
         "TrainerController.start_learning": {
-            "total": 2313.9464721000113,
             "count": 1,
-            "self": 2.3979349007859128,
             "children": {
                 "TrainerController._reset_env": {
-                    "total": 18.20734400000947,
                     "count": 1,
-                    "self": 18.20734400000947
                 },
                 "TrainerController.advance": {
-                    "total": 2293.2781405992137,
-                    "count": 133585,
-                    "self": 2.3464309999835677,
                     "children": {
                         "env_step": {
-                            "total": 1801.8451803984935,
-                            "count": 133585,
-                            "self": 1245.4353575954156,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
-                                    "total": 554.7722125035652,
-                                    "count": 133585,
-                                    "self": 7.100844602406141,
                                     "children": {
                                         "TorchPolicy.evaluate": {
-                                            "total": 547.6713679011591,
-                                            "count": 125119,
-                                            "self": 547.6713679011591
                                         }
                                     }
                                 },
                                 "workers": {
-                                    "total": 1.637610299512744,
-                                    "count": 133585,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
-                                            "total": 2293.928678000215,
-                                            "count": 133585,
                                             "is_parallel": true,
-                                            "self": 1206.7310486966308,
                                             "children": {
                                                 "steps_from_proto": {
-                                                    "total": 0.0004230999911669642,
                                                     "count": 1,
                                                     "is_parallel": true,
-                                                    "self": 0.00023099998361431062,
                                                     "children": {
                                                         "_process_rank_one_or_two_observation": {
-                                                            "total": 0.00019210000755265355,
                                                             "count": 2,
                                                             "is_parallel": true,
-                                                            "self": 0.00019210000755265355
                                                         }
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
-                                                    "total": 1087.197206203593,
-                                                    "count": 133585,
                                                     "is_parallel": true,
-                                                    "self": 14.519432301283814,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
-                                                            "total": 23.934972499875585,
-                                                            "count": 133585,
                                                             "is_parallel": true,
-                                                            "self": 23.934972499875585
                                                         },
                                                         "communicator.exchange": {
-                                                            "total": 1011.1638868003356,
-                                                            "count": 133585,
                                                             "is_parallel": true,
-                                                            "self": 1011.1638868003356
                                                         },
                                                         "steps_from_proto": {
-                                                            "total": 37.57891460209794,
-                                                            "count": 133585,
                                                             "is_parallel": true,
-                                                            "self": 19.125529006996658,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
-                                                                    "total": 18.45338559510128,
-                                                                    "count": 267170,
                                                                     "is_parallel": true,
-                                                                    "self": 18.45338559510128
                                                                 }
                                                             }
                                                         }
@@ -265,31 +265,31 @@
                             }
                         },
                         "trainer_advance": {
-                            "total": 489.08652920073655,
-                            "count": 133585,
-                            "self": 4.662728499126388,
                             "children": {
                                 "process_trajectory": {
-                                    "total": 115.56687550162314,
-                                    "count": 133585,
-                                    "self": 115.2692794016184,
                                     "children": {
                                         "RLTrainer._checkpoint": {
-                                            "total": 0.29759610000473913,
-                                            "count": 4,
-                                            "self": 0.29759610000473913
                                         }
                                     }
                                 },
                                 "_update_policy": {
-                                    "total": 368.856925199987,
-                                    "count": 38,
-                                    "self": 199.19484069995815,
                                     "children": {
                                         "TorchPPOOptimizer.update": {
-                                            "total": 169.66208450002887,
-                                            "count": 11400,
-                                            "self": 169.66208450002887
                                         }
                                     }
                                 }
@@ -298,19 +298,19 @@
                     }
                 },
                 "trainer_threads": {
-                    "total": 7.00005330145359e-07,
                     "count": 1,
-                    "self": 7.00005330145359e-07
                 },
                 "TrainerController._save_models": {
-                    "total": 0.06305189999693539,
                     "count": 1,
-                    "self": 0.007433899998432025,
                     "children": {
                         "RLTrainer._checkpoint": {
-                            "total": 0.055617999998503365,
                             "count": 1,
-                            "self": 0.055617999998503365
                         }
                     }
                 }

     "name": "root",
     "gauges": {
         "AimAndThrow.Policy.Entropy.mean": {
+            "value": 0.459451287984848,
+            "min": 0.459451287984848,
+            "max": 1.78365159034729,
+            "count": 25
         },
         "AimAndThrow.Policy.Entropy.sum": {
+            "value": 92037.28125,
+            "min": 92037.28125,
+            "max": 360326.15625,
+            "count": 25
         },
         "AimAndThrow.Environment.EpisodeLength.mean": {
+            "value": 85.02537634408603,
+            "min": 85.02537634408603,
+            "max": 220.64745011086475,
+            "count": 25
         },
         "AimAndThrow.Environment.EpisodeLength.sum": {
+            "value": 197684.0,
+            "min": 197684.0,
+            "max": 199158.0,
+            "count": 25
         },
         "AimAndThrow.Step.mean": {
+            "value": 4999944.0,
+            "min": 199987.0,
+            "max": 4999944.0,
+            "count": 25
         },
         "AimAndThrow.Step.sum": {
+            "value": 4999944.0,
+            "min": 199987.0,
+            "max": 4999944.0,
+            "count": 25
         },
         "AimAndThrow.Policy.ExtrinsicValueEstimate.mean": {
+            "value": 0.3502625524997711,
+            "min": -0.361602783203125,
+            "max": 0.3502625524997711,
+            "count": 25
         },
         "AimAndThrow.Policy.ExtrinsicValueEstimate.sum": {
+            "value": 814.0101928710938,
+            "min": -326.16571044921875,
+            "max": 814.0101928710938,
+            "count": 25
         },
         "AimAndThrow.Environment.CumulativeReward.mean": {
+            "value": 0.6396350792501757,
+            "min": -1.1251290707282346,
+            "max": 0.6396350792501757,
+            "count": 25
         },
         "AimAndThrow.Environment.CumulativeReward.sum": {
+            "value": 1486.5119241774082,
+            "min": -1499.797051280737,
+            "max": 1486.5119241774082,
+            "count": 25
         },
         "AimAndThrow.Policy.ExtrinsicReward.mean": {
+            "value": 0.6396350792501757,
+            "min": -1.1251290707282346,
+            "max": 0.6396350792501757,
+            "count": 25
         },
         "AimAndThrow.Policy.ExtrinsicReward.sum": {
+            "value": 1486.5119241774082,
+            "min": -1499.797051280737,
+            "max": 1486.5119241774082,
+            "count": 25
         },
         "AimAndThrow.Losses.PolicyLoss.mean": {
+            "value": 0.03416703626164235,
+            "min": 0.034137498232303186,
+            "max": 0.03677766110825663,
+            "count": 25
         },
         "AimAndThrow.Losses.PolicyLoss.sum": {
+            "value": 0.1366681450465694,
+            "min": 0.10485064039084441,
+            "max": 0.14711064443302652,
+            "count": 25
         },
         "AimAndThrow.Losses.ValueLoss.mean": {
+            "value": 0.04043484590481967,
+            "min": 0.020302016613228866,
+            "max": 0.05864293771175047,
+            "count": 25
         },
         "AimAndThrow.Losses.ValueLoss.sum": {
+            "value": 0.16173938361927867,
+            "min": 0.08120806645291546,
+            "max": 0.23457175084700188,
+            "count": 25
         },
         "AimAndThrow.Policy.LearningRate.mean": {
+            "value": 1.9945730055250013e-06,
+            "min": 1.9945730055250013e-06,
+            "max": 9.794401538932e-05,
+            "count": 25
         },
         "AimAndThrow.Policy.LearningRate.sum": {
+            "value": 7.978292022100005e-06,
+            "min": 7.978292022100005e-06,
+            "max": 0.0003773958226042,
+            "count": 25
         },
         "AimAndThrow.Policy.Epsilon.mean": {
+            "value": 0.10199447500000003,
+            "min": 0.10199447500000003,
+            "max": 0.19794401333333336,
+            "count": 25
         },
         "AimAndThrow.Policy.Epsilon.sum": {
+            "value": 0.4079779000000001,
+            "min": 0.36600964,
+            "max": 0.7773957999999999,
+            "count": 25
         },
         "AimAndThrow.Policy.Beta.mean": {
+            "value": 2.9745302500000005e-05,
+            "min": 2.9745302500000005e-05,
+            "max": 0.0009796457320000002,
+            "count": 25
         },
         "AimAndThrow.Policy.Beta.sum": {
+            "value": 0.00011898121000000002,
+            "min": 0.00011898121000000002,
+            "max": 0.0037762184200000005,
+            "count": 25
         },
         "AimAndThrow.IsTraining.mean": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
+            "count": 25
         },
         "AimAndThrow.IsTraining.sum": {
             "value": 1.0,
             "min": 1.0,
             "max": 1.0,
+            "count": 25
         }
     },
     "metadata": {
         "timer_format_version": "0.1.0",
+        "start_time_seconds": "1757063460",
         "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr  5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]",
+        "command_line_arguments": "D:\\ml-agents\\venv\\Scripts\\mlagents-learn ../../config/AimAndThrow.yaml --run-id=ToucanHush-lr1e-4-bs512-seed451 --seed=451 --force",
         "mlagents_version": "1.2.0.dev0",
         "mlagents_envs_version": "1.2.0.dev0",
         "communication_protocol_version": "1.5.0",
         "pytorch_version": "2.8.0+cu129",
         "numpy_version": "1.23.5",
+        "end_time_seconds": "1757070020"
     },
+    "total": 6560.5212958,
     "count": 1,
+    "self": 0.007863500000894419,
     "children": {
         "run_training.setup": {
+            "total": 0.08756299999998873,
             "count": 1,
+            "self": 0.08756299999998873
         },
         "TrainerController.start_learning": {
+            "total": 6560.425869299999,
             "count": 1,
+            "self": 7.095670400263771,
             "children": {
                 "TrainerController._reset_env": {
+                    "total": 20.534740399999976,
                     "count": 1,
+                    "self": 20.534740399999976
                 },
                 "TrainerController.advance": {
+                    "total": 6532.7125493997355,
+                    "count": 340583,
+                    "self": 7.2223678995742375,
                     "children": {
                         "env_step": {
+                            "total": 5119.653573500246,
+                            "count": 340583,
+                            "self": 3649.4608105005127,
                             "children": {
                                 "SubprocessEnvManager._take_step": {
+                                    "total": 1465.4144640998343,
+                                    "count": 340583,
+                                    "self": 21.73783139998841,
                                     "children": {
                                         "TorchPolicy.evaluate": {
+                                            "total": 1443.6766326998459,
+                                            "count": 312572,
+                                            "self": 1443.6766326998459
                                         }
                                     }
                                 },
                                 "workers": {
+                                    "total": 4.778298899899255,
+                                    "count": 340583,
                                     "self": 0.0,
                                     "children": {
                                         "worker_root": {
+                                            "total": 6533.0349240996875,
+                                            "count": 340583,
                                             "is_parallel": true,
+                                            "self": 3347.4401770996496,
                                             "children": {
                                                 "steps_from_proto": {
+                                                    "total": 0.0006743000000142274,
                                                     "count": 1,
                                                     "is_parallel": true,
+                                                    "self": 0.0003619000000298911,
                                                     "children": {
                                                         "_process_rank_one_or_two_observation": {
+                                                            "total": 0.0003123999999843363,
                                                             "count": 2,
                                                             "is_parallel": true,
+                                                            "self": 0.0003123999999843363
                                                         }
                                                     }
                                                 },
                                                 "UnityEnvironment.step": {
+                                                    "total": 3185.594072700038,
+                                                    "count": 340583,
                                                     "is_parallel": true,
+                                                    "self": 41.29519420048928,
                                                     "children": {
                                                         "UnityEnvironment._generate_step_input": {
+                                                            "total": 68.51995659977729,
+                                                            "count": 340583,
                                                             "is_parallel": true,
+                                                            "self": 68.51995659977729
                                                         },
                                                         "communicator.exchange": {
+                                                            "total": 2971.9692622997804,
+                                                            "count": 340583,
                                                             "is_parallel": true,
+                                                            "self": 2971.9692622997804
                                                         },
                                                         "steps_from_proto": {
+                                                            "total": 103.80965959999088,
+                                                            "count": 340583,
                                                             "is_parallel": true,
+                                                            "self": 53.13167199986731,
                                                             "children": {
                                                                 "_process_rank_one_or_two_observation": {
+                                                                    "total": 50.67798760012357,
+                                                                    "count": 681166,
                                                                     "is_parallel": true,
+                                                                    "self": 50.67798760012357
                                                                 }
                                                             }
                                                         }
                             }
                         },
                         "trainer_advance": {
+                            "total": 1405.8366079999153,
+                            "count": 340583,
+                            "self": 12.847537699994746,
                             "children": {
                                 "process_trajectory": {
+                                    "total": 370.5619708999176,
+                                    "count": 340583,
+                                    "self": 369.56727029991805,
                                     "children": {
                                         "RLTrainer._checkpoint": {
+                                            "total": 0.9947005999995326,
+                                            "count": 10,
+                                            "self": 0.9947005999995326
                                         }
                                     }
                                 },
                                 "_update_policy": {
+                                    "total": 1022.427099400003,
+                                    "count": 97,
+                                    "self": 550.2304691000079,
                                     "children": {
                                         "TorchPPOOptimizer.update": {
+                                            "total": 472.19663029999515,
+                                            "count": 29100,
+                                            "self": 472.19663029999515
                                         }
                                     }
                                 }
                     }
                 },
                 "trainer_threads": {
+                    "total": 6.000000212225132e-07,
                     "count": 1,
+                    "self": 6.000000212225132e-07
                 },
                 "TrainerController._save_models": {
+                    "total": 0.08290849999957572,
                     "count": 1,
+                    "self": 0.01336419999915961,
                     "children": {
                         "RLTrainer._checkpoint": {
+                            "total": 0.06954430000041611,
                             "count": 1,
+                            "self": 0.06954430000041611
                         }
                     }
                 }

run_logs/training_status.json CHANGED Viewed

@@ -2,103 +2,103 @@
     "AimAndThrow": {
         "checkpoints": [
             {
-                "steps": 1999842,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-1999842.onnx",
-                "reward": -0.32050378287016457,
-                "creation_time": 1757014111.006714,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-1999842.pt"
                 ]
             },
             {
-                "steps": 2499936,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2499936.onnx",
-                "reward": -0.25919144333264477,
-                "creation_time": 1757014766.098421,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2499936.pt"
                 ]
             },
             {
-                "steps": 2999763,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2999763.onnx",
-                "reward": -0.3543306884746398,
-                "creation_time": 1757015392.6672244,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-2999763.pt"
                 ]
             },
             {
-                "steps": 3000013,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.onnx",
-                "reward": -0.35946404337883,
-                "creation_time": 1757015392.7439406,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.pt"
                 ]
             },
             {
-                "steps": 3000013,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.onnx",
-                "reward": null,
-                "creation_time": 1757016009.2596812,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3000013.pt"
                 ]
             },
             {
-                "steps": 3499965,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3499965.onnx",
-                "reward": -0.2504000486298041,
-                "creation_time": 1757016617.46741,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3499965.pt"
                 ]
             },
             {
-                "steps": 3999938,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3999938.onnx",
-                "reward": -0.3533456439481062,
-                "creation_time": 1757017200.9901583,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-3999938.pt"
                 ]
             },
             {
-                "steps": 4499955,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4499955.onnx",
-                "reward": -0.29398596328748783,
-                "creation_time": 1757017777.2994728,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4499955.pt"
                 ]
             },
             {
-                "steps": 4999907,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4999907.onnx",
-                "reward": -0.31742338289817174,
-                "creation_time": 1757018343.732425,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-4999907.pt"
                 ]
             },
             {
-                "steps": 5000142,
-                "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.onnx",
-                "reward": -0.3161561958417544,
-                "creation_time": 1757018343.7977736,
                 "auxillary_file_paths": [
-                    "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.pt"
                 ]
             }
         ],
         "final_checkpoint": {
-            "steps": 5000142,
-            "file_path": "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow.onnx",
-            "reward": -0.3161561958417544,
-            "creation_time": 1757018343.7977736,
             "auxillary_file_paths": [
-                "results\\ToucanHush-lr1e-4-bs512\\AimAndThrow\\AimAndThrow-5000142.pt"
             ]
         }
     },

     "AimAndThrow": {
         "checkpoints": [
             {
+                "steps": 999996,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-999996.onnx",
+                "reward": -0.5190732231954249,
+                "creation_time": 1757064777.69313,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-999996.pt"
                 ]
             },
             {
+                "steps": 1499763,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1499763.onnx",
+                "reward": -0.5913965953835125,
+                "creation_time": 1757065426.7471402,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1499763.pt"
                 ]
             },
             {
+                "steps": 1999828,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1999828.onnx",
+                "reward": -0.037763210601712525,
+                "creation_time": 1757066065.8641207,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-1999828.pt"
                 ]
             },
             {
+                "steps": 2499893,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2499893.onnx",
+                "reward": 0.3667246930898274,
+                "creation_time": 1757066714.01822,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2499893.pt"
                 ]
             },
             {
+                "steps": 2999898,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2999898.onnx",
+                "reward": 0.4242128275408603,
+                "creation_time": 1757067357.4376752,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-2999898.pt"
                 ]
             },
             {
+                "steps": 3499955,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3499955.onnx",
+                "reward": 0.5942972659527719,
+                "creation_time": 1757068020.0002558,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3499955.pt"
                 ]
             },
             {
+                "steps": 3999903,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3999903.onnx",
+                "reward": 0.6044068104187346,
+                "creation_time": 1757068694.08997,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-3999903.pt"
                 ]
             },
             {
+                "steps": 4499878,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4499878.onnx",
+                "reward": 0.6381313523091829,
+                "creation_time": 1757069358.1493115,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4499878.pt"
                 ]
             },
             {
+                "steps": 4999944,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4999944.onnx",
+                "reward": 0.6078923956094036,
+                "creation_time": 1757070020.4738462,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-4999944.pt"
                 ]
             },
             {
+                "steps": 5000085,
+                "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.onnx",
+                "reward": 0.6087707160961016,
+                "creation_time": 1757070020.5539749,
                 "auxillary_file_paths": [
+                    "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.pt"
                 ]
             }
         ],
         "final_checkpoint": {
+            "steps": 5000085,
+            "file_path": "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow.onnx",
+            "reward": 0.6087707160961016,
+            "creation_time": 1757070020.5539749,
             "auxillary_file_paths": [
+                "results\\ToucanHush-lr1e-4-bs512-seed451\\AimAndThrow\\AimAndThrow-5000085.pt"
             ]
         }
     },