Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use gmalivenko/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use gmalivenko/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="gmalivenko/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.5026072859764099, | |
| "min": 0.5026072859764099, | |
| "max": 1.4436146020889282, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 15110.3857421875, | |
| "min": 15110.3857421875, | |
| "max": 43793.4921875, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 989927.0, | |
| "min": 29942.0, | |
| "max": 989927.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 989927.0, | |
| "min": 29942.0, | |
| "max": 989927.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.4582596719264984, | |
| "min": -0.10647869855165482, | |
| "max": 0.46304410696029663, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 124.18836975097656, | |
| "min": -25.554887771606445, | |
| "max": 124.55886840820312, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.01799023523926735, | |
| "min": -0.03115887939929962, | |
| "max": 0.5443099141120911, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 4.875353813171387, | |
| "min": -8.381738662719727, | |
| "max": 129.00144958496094, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.07030688133852979, | |
| "min": 0.06439152579353986, | |
| "max": 0.07281681260649171, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.984296338739417, | |
| "min": 0.509717688245442, | |
| "max": 1.0233207552761694, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.016910908521995657, | |
| "min": 3.6694227394499604e-05, | |
| "max": 0.016910908521995657, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.2367527193079392, | |
| "min": 0.0005137191835229945, | |
| "max": 0.25074333061125154, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.322304702121432e-06, | |
| "min": 7.322304702121432e-06, | |
| "max": 0.0002952333444460286, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.00010251226582970005, | |
| "min": 0.00010251226582970005, | |
| "max": 0.0035091131302956995, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10244073571428569, | |
| "min": 0.10244073571428569, | |
| "max": 0.19841111428571429, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4341702999999997, | |
| "min": 1.3888778, | |
| "max": 2.5697043000000006, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.00025382949785714295, | |
| "min": 0.00025382949785714295, | |
| "max": 0.009841270317142856, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.003553612970000001, | |
| "min": 0.003553612970000001, | |
| "max": 0.11699345957, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.012287263758480549, | |
| "min": 0.011541834101080894, | |
| "max": 0.5963875651359558, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.17202168703079224, | |
| "min": 0.16158567368984222, | |
| "max": 4.174713134765625, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 378.7901234567901, | |
| "min": 378.7901234567901, | |
| "max": 999.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 30682.0, | |
| "min": 16613.0, | |
| "max": 33111.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.5470987463622918, | |
| "min": -0.999987552408129, | |
| "max": 1.5470987463622918, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 125.31499845534563, | |
| "min": -31.999601677060127, | |
| "max": 125.31499845534563, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.5470987463622918, | |
| "min": -0.999987552408129, | |
| "max": 1.5470987463622918, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 125.31499845534563, | |
| "min": -31.999601677060127, | |
| "max": 125.31499845534563, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.04873414869193149, | |
| "min": 0.04873414869193149, | |
| "max": 12.113140689099536, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 3.9474660440464504, | |
| "min": 3.529839080001693, | |
| "max": 205.92339171469212, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1777799116", | |
| "python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]", | |
| "command_line_arguments": "/home/user/miniforge3/envs/py31012/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1777800086" | |
| }, | |
| "total": 970.7906739129976, | |
| "count": 1, | |
| "self": 0.31871445900105755, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.013402051998127718, | |
| "count": 1, | |
| "self": 0.013402051998127718 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 970.4585574019984, | |
| "count": 1, | |
| "self": 1.1806836719842977, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 1.4449513839972496, | |
| "count": 1, | |
| "self": 1.4449513839972496 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 967.8050073450177, | |
| "count": 63584, | |
| "self": 1.2481639090037788, | |
| "children": { | |
| "env_step": { | |
| "total": 603.1725988958715, | |
| "count": 63584, | |
| "self": 540.4988386426339, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 61.8649808001137, | |
| "count": 63584, | |
| "self": 2.765883009200479, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 59.09909779091322, | |
| "count": 62561, | |
| "self": 59.09909779091322 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.8087794531238615, | |
| "count": 63584, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 968.9295175952648, | |
| "count": 63584, | |
| "is_parallel": true, | |
| "self": 493.7550299362083, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0010336509985791054, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00033265700039919466, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0007009939981799107, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0007009939981799107 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.027043032001529355, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0002582729975983966, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.00022782300220569596, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00022782300220569596 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.025780371001019375, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.025780371001019375 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0007765650007058866, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00019709200205397792, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0005794729986519087, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0005794729986519087 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 475.17448765905647, | |
| "count": 63583, | |
| "is_parallel": true, | |
| "self": 15.020436929426069, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 10.360689733675827, | |
| "count": 63583, | |
| "is_parallel": true, | |
| "self": 10.360689733675827 | |
| }, | |
| "communicator.exchange": { | |
| "total": 407.7471936863258, | |
| "count": 63583, | |
| "is_parallel": true, | |
| "self": 407.7471936863258 | |
| }, | |
| "steps_from_proto": { | |
| "total": 42.0461673096288, | |
| "count": 63583, | |
| "is_parallel": true, | |
| "self": 10.174381445202016, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 31.871785864426784, | |
| "count": 508664, | |
| "is_parallel": true, | |
| "self": 31.871785864426784 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 363.3842445401424, | |
| "count": 63584, | |
| "self": 2.3287487244961085, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 68.82358752960499, | |
| "count": 63584, | |
| "self": 68.75443767860634, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.06914985099865589, | |
| "count": 2, | |
| "self": 0.06914985099865589 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 292.2319082860413, | |
| "count": 454, | |
| "self": 149.69308987889235, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 142.53881840714894, | |
| "count": 22803, | |
| "self": 142.53881840714894 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 9.779978427104652e-07, | |
| "count": 1, | |
| "self": 9.779978427104652e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.027914023001358146, | |
| "count": 1, | |
| "self": 0.0005242289989837445, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.0273897940023744, | |
| "count": 1, | |
| "self": 0.0273897940023744 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |