Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use pardee-wal/ppo-Pyramid with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use pardee-wal/ppo-Pyramid with ml-agents:
mlagents-load-from-hf --repo-id="pardee-wal/ppo-Pyramid" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.23216168582439423, | |
| "min": 0.22822752594947815, | |
| "max": 1.4686609506607056, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 6968.56494140625, | |
| "min": 6872.38720703125, | |
| "max": 44553.296875, | |
| "count": 68 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 2039939.0, | |
| "min": 29952.0, | |
| "max": 2039939.0, | |
| "count": 68 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 2039939.0, | |
| "min": 29952.0, | |
| "max": 2039939.0, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.8554800152778625, | |
| "min": -0.10093619674444199, | |
| "max": 0.8554800152778625, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 256.6440124511719, | |
| "min": -24.426559448242188, | |
| "max": 256.6440124511719, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.0081409877166152, | |
| "min": -0.03461739420890808, | |
| "max": 0.2912626266479492, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 2.442296266555786, | |
| "min": -9.935192108154297, | |
| "max": 70.19429016113281, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06831075437159077, | |
| "min": 0.06329921378831689, | |
| "max": 0.07736064368024621, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9563505612022708, | |
| "min": 0.49836566674304944, | |
| "max": 1.083049011523447, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.013814210268763073, | |
| "min": 0.0008615905706710742, | |
| "max": 0.01700291723967644, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.19339894376268302, | |
| "min": 0.011200677418723964, | |
| "max": 0.23804084135547018, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 9.75223603497119e-05, | |
| "min": 9.75223603497119e-05, | |
| "max": 0.00029838354339596195, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.0013653130448959664, | |
| "min": 0.0013653130448959664, | |
| "max": 0.00392742009086, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.13250743095238096, | |
| "min": 0.13250743095238096, | |
| "max": 0.19946118095238097, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.8551040333333333, | |
| "min": 1.3962282666666668, | |
| "max": 2.7091399999999997, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.0032574923521428573, | |
| "min": 0.0032574923521428573, | |
| "max": 0.009946171977142856, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.04560489293, | |
| "min": 0.04560489293, | |
| "max": 0.130923086, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.006816111970692873, | |
| "min": 0.006301645189523697, | |
| "max": 0.37930354475975037, | |
| "count": 68 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.0954255685210228, | |
| "min": 0.08822303265333176, | |
| "max": 2.6551249027252197, | |
| "count": 68 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 210.9794520547945, | |
| "min": 210.9794520547945, | |
| "max": 999.0, | |
| "count": 68 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 30803.0, | |
| "min": 15984.0, | |
| "max": 33855.0, | |
| "count": 68 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.7753150575883585, | |
| "min": -1.0000000521540642, | |
| "max": 1.7753150575883585, | |
| "count": 68 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 259.19599840790033, | |
| "min": -30.0198016166687, | |
| "max": 259.19599840790033, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.7753150575883585, | |
| "min": -1.0000000521540642, | |
| "max": 1.7753150575883585, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 259.19599840790033, | |
| "min": -30.0198016166687, | |
| "max": 259.19599840790033, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.014890777429347309, | |
| "min": 0.014890777429347309, | |
| "max": 7.675238830037415, | |
| "count": 68 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 2.174053504684707, | |
| "min": 1.8585745868622325, | |
| "max": 122.80382128059864, | |
| "count": 68 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 68 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 68 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1780644297", | |
| "python_version": "3.10.11 (main, May 16 2023, 00:28:57) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1780649675" | |
| }, | |
| "total": 5377.578943773, | |
| "count": 1, | |
| "self": 0.4792717000000266, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.023006346000101985, | |
| "count": 1, | |
| "self": 0.023006346000101985 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 5377.076665727, | |
| "count": 1, | |
| "self": 3.365448631909203, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.148337442999946, | |
| "count": 1, | |
| "self": 2.148337442999946 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 5371.561277016091, | |
| "count": 131610, | |
| "self": 3.4163277140623904, | |
| "children": { | |
| "env_step": { | |
| "total": 3978.422131251942, | |
| "count": 131610, | |
| "self": 3630.5817540216904, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 345.8151303160721, | |
| "count": 131610, | |
| "self": 10.368254946965408, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 335.4468753691067, | |
| "count": 127653, | |
| "self": 335.4468753691067 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 2.0252469141794336, | |
| "count": 131609, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 5364.83384016797, | |
| "count": 131609, | |
| "is_parallel": true, | |
| "self": 2002.134551953911, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0017734300001848169, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006051519999346056, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0011682780002502113, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0011682780002502113 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05205409800009875, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005910200002290367, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.00045941899998069857, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00045941899998069857 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.04929057599997577, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.04929057599997577 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0017130829999132402, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00037201199938863283, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013410710005246074, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013410710005246074 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 3362.6992882140594, | |
| "count": 131608, | |
| "is_parallel": true, | |
| "self": 72.65770129504835, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 49.17720659810061, | |
| "count": 131608, | |
| "is_parallel": true, | |
| "self": 49.17720659810061 | |
| }, | |
| "communicator.exchange": { | |
| "total": 3006.3144070979442, | |
| "count": 131608, | |
| "is_parallel": true, | |
| "self": 3006.3144070979442 | |
| }, | |
| "steps_from_proto": { | |
| "total": 234.54997322296617, | |
| "count": 131608, | |
| "is_parallel": true, | |
| "self": 49.87128470466223, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 184.67868851830394, | |
| "count": 1052864, | |
| "is_parallel": true, | |
| "self": 184.67868851830394 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 1389.7228180500865, | |
| "count": 131609, | |
| "self": 6.597880311056542, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 268.3150561860273, | |
| "count": 131609, | |
| "self": 267.9117552450273, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.4033009409999977, | |
| "count": 4, | |
| "self": 0.4033009409999977 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 1114.8098815530027, | |
| "count": 939, | |
| "self": 611.7738963660115, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 503.0359851869912, | |
| "count": 46521, | |
| "self": 503.0359851869912 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.3549997674999759e-06, | |
| "count": 1, | |
| "self": 1.3549997674999759e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.0016012810001484468, | |
| "count": 1, | |
| "self": 2.6049000553030055e-05, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.0015752319995954167, | |
| "count": 1, | |
| "self": 0.0015752319995954167 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |