Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use Dash10107/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Dash10107/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="Dash10107/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.13279588520526886, | |
| "min": 0.1276216357946396, | |
| "max": 1.5100661516189575, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 3981.751953125, | |
| "min": 3843.506103515625, | |
| "max": 45809.3671875, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 2999955.0, | |
| "min": 29952.0, | |
| "max": 2999955.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 2999955.0, | |
| "min": 29952.0, | |
| "max": 2999955.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.7765174508094788, | |
| "min": -0.12612566351890564, | |
| "max": 0.8122166991233826, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 232.95523071289062, | |
| "min": -29.891782760620117, | |
| "max": 245.11643981933594, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.00495332945138216, | |
| "min": -0.018807025626301765, | |
| "max": 0.4251170754432678, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 1.4859988689422607, | |
| "min": -5.529265403747559, | |
| "max": 100.75274658203125, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.0669019984225521, | |
| "min": 0.06402619248408535, | |
| "max": 0.07364090123179513, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9366279779157293, | |
| "min": 0.515486308622566, | |
| "max": 1.0730021896888502, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.015109357492682258, | |
| "min": 0.0008536211755744702, | |
| "max": 0.01724082210369358, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.2115310048975516, | |
| "min": 0.010243454106893642, | |
| "max": 0.24137150945171013, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 1.468999510366665e-06, | |
| "min": 1.468999510366665e-06, | |
| "max": 0.00029838354339596195, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 2.0565993145133312e-05, | |
| "min": 2.0565993145133312e-05, | |
| "max": 0.0039693559768813665, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10048963333333334, | |
| "min": 0.10048963333333334, | |
| "max": 0.19946118095238097, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4068548666666667, | |
| "min": 1.3962282666666668, | |
| "max": 2.7674477333333334, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 5.891436999999994e-05, | |
| "min": 5.891436999999994e-05, | |
| "max": 0.009946171977142856, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0008248011799999992, | |
| "min": 0.0008248011799999992, | |
| "max": 0.13231955147000002, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.006202308926731348, | |
| "min": 0.006202308926731348, | |
| "max": 0.6022911667823792, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.08683232218027115, | |
| "min": 0.08683232218027115, | |
| "max": 4.216038227081299, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 238.46153846153845, | |
| "min": 216.85384615384615, | |
| "max": 999.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 31000.0, | |
| "min": 15984.0, | |
| "max": 32696.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.7457813864299494, | |
| "min": -1.0000000521540642, | |
| "max": 1.7707822481470723, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 225.20579884946346, | |
| "min": -30.72800175845623, | |
| "max": 236.1653986275196, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.7457813864299494, | |
| "min": -1.0000000521540642, | |
| "max": 1.7707822481470723, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 225.20579884946346, | |
| "min": -30.72800175845623, | |
| "max": 236.1653986275196, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.015671909666397293, | |
| "min": 0.015229424077439874, | |
| "max": 12.687960926443338, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 2.0216763469652506, | |
| "min": 1.952062553173164, | |
| "max": 203.00737482309341, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1775916546", | |
| "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]", | |
| "command_line_arguments": "/usr/local/envs/mlagents_env/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1775923911" | |
| }, | |
| "total": 7365.131160466, | |
| "count": 1, | |
| "self": 0.7075720490011008, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.02576448799982245, | |
| "count": 1, | |
| "self": 0.02576448799982245 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 7364.397823928999, | |
| "count": 1, | |
| "self": 4.644521037062077, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.3667731900000035, | |
| "count": 1, | |
| "self": 2.3667731900000035 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 7357.300293933937, | |
| "count": 194544, | |
| "self": 4.702609164880414, | |
| "children": { | |
| "env_step": { | |
| "total": 5312.994435509045, | |
| "count": 194544, | |
| "self": 4822.917501759671, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 487.21450290226676, | |
| "count": 194544, | |
| "self": 14.572083858023689, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 472.64241904424307, | |
| "count": 187557, | |
| "self": 472.64241904424307 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 2.8624308471071345, | |
| "count": 194544, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 7344.031394370102, | |
| "count": 194544, | |
| "is_parallel": true, | |
| "self": 2902.051859378209, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0017463559997850098, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.000578920999487309, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0011674350002977008, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0011674350002977008 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05433119600002101, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005415370001173869, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.00046252099991761497, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00046252099991761497 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.05154338200009079, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.05154338200009079 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0017837559998952202, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003848189999189344, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013989369999762857, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013989369999762857 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 4441.979534991893, | |
| "count": 194543, | |
| "is_parallel": true, | |
| "self": 103.544239829851, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 71.26049822862024, | |
| "count": 194543, | |
| "is_parallel": true, | |
| "self": 71.26049822862024 | |
| }, | |
| "communicator.exchange": { | |
| "total": 3930.9001369647294, | |
| "count": 194543, | |
| "is_parallel": true, | |
| "self": 3930.9001369647294 | |
| }, | |
| "steps_from_proto": { | |
| "total": 336.27465996869296, | |
| "count": 194543, | |
| "is_parallel": true, | |
| "self": 72.61463308996554, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 263.6600268787274, | |
| "count": 1556344, | |
| "is_parallel": true, | |
| "self": 263.6600268787274 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 2039.6032492600111, | |
| "count": 194544, | |
| "self": 9.248691783554477, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 391.15998838244104, | |
| "count": 194544, | |
| "self": 390.62292449844153, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.5370638839995081, | |
| "count": 6, | |
| "self": 0.5370638839995081 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 1639.1945690940156, | |
| "count": 1396, | |
| "self": 905.6089767299436, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 733.585592364072, | |
| "count": 68421, | |
| "self": 733.585592364072 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.3030003174208105e-06, | |
| "count": 1, | |
| "self": 1.3030003174208105e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.0862344649995066, | |
| "count": 1, | |
| "self": 0.001065538999682758, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.08516892599982384, | |
| "count": 1, | |
| "self": 0.08516892599982384 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |