Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use commanderxa/ppo-PyramindsTraining with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use commanderxa/ppo-PyramindsTraining with ml-agents:
mlagents-load-from-hf --repo-id="commanderxa/ppo-PyramindsTraining" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.29010871052742004, | |
| "min": 0.28967219591140747, | |
| "max": 1.4201050996780396, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 8717.1865234375, | |
| "min": 8666.9921875, | |
| "max": 43080.30859375, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 989885.0, | |
| "min": 29952.0, | |
| "max": 989885.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 989885.0, | |
| "min": 29952.0, | |
| "max": 989885.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.565032958984375, | |
| "min": -0.09078627824783325, | |
| "max": 0.5994598865509033, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 153.68896484375, | |
| "min": -21.879493713378906, | |
| "max": 169.0476837158203, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.013102969154715538, | |
| "min": -0.05149155482649803, | |
| "max": 0.5854023694992065, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 3.564007520675659, | |
| "min": -13.490787506103516, | |
| "max": 138.7403564453125, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06925368377484291, | |
| "min": 0.06613300761541668, | |
| "max": 0.07506670230042573, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9695515728478008, | |
| "min": 0.5054248336606577, | |
| "max": 1.085479220827384, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.01440242283951236, | |
| "min": 0.00030848678303165965, | |
| "max": 0.01571312938981604, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.20163391975317305, | |
| "min": 0.0040103281794115755, | |
| "max": 0.21998381145742454, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.744361704292854e-06, | |
| "min": 7.744361704292854e-06, | |
| "max": 0.00029515063018788575, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.00010842106386009996, | |
| "min": 0.00010842106386009996, | |
| "max": 0.0034929280356906997, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10258142142857143, | |
| "min": 0.10258142142857143, | |
| "max": 0.19838354285714285, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4361399000000001, | |
| "min": 1.3886848, | |
| "max": 2.527537, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.0002678840007142856, | |
| "min": 0.0002678840007142856, | |
| "max": 0.00983851593142857, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0037503760099999984, | |
| "min": 0.0037503760099999984, | |
| "max": 0.11644449907, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.014360090717673302, | |
| "min": 0.014204817824065685, | |
| "max": 0.5421449542045593, | |
| "count": 33 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.20104126632213593, | |
| "min": 0.19886745512485504, | |
| "max": 3.7950146198272705, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 346.54761904761904, | |
| "min": 314.2258064516129, | |
| "max": 999.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 29110.0, | |
| "min": 15984.0, | |
| "max": 33125.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.629638076715526, | |
| "min": -1.0000000521540642, | |
| "max": 1.642750516053169, | |
| "count": 33 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 136.8895984441042, | |
| "min": -30.463601663708687, | |
| "max": 152.77579799294472, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.629638076715526, | |
| "min": -1.0000000521540642, | |
| "max": 1.642750516053169, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 136.8895984441042, | |
| "min": -30.463601663708687, | |
| "max": 152.77579799294472, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.051636189127401894, | |
| "min": 0.04994475566631844, | |
| "max": 11.260127009823918, | |
| "count": 33 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 4.337439886701759, | |
| "min": 4.2647237045748625, | |
| "max": 180.1620321571827, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 33 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1782157848", | |
| "python_version": "3.10.12 (main, Mar 3 2026, 11:56:32) [GCC 11.4.0]", | |
| "command_line_arguments": "/env/py310/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training 2 --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1782160426" | |
| }, | |
| "total": 2578.7700127869994, | |
| "count": 1, | |
| "self": 0.4270502029985437, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.024317753000104858, | |
| "count": 1, | |
| "self": 0.024317753000104858 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2578.3186448310007, | |
| "count": 1, | |
| "self": 1.3767602889247428, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.85560293799972, | |
| "count": 1, | |
| "self": 2.85560293799972 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2574.008934637078, | |
| "count": 63875, | |
| "self": 1.3916544699195583, | |
| "children": { | |
| "env_step": { | |
| "total": 1934.8630425281608, | |
| "count": 63875, | |
| "self": 1781.4818960892462, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 152.52995557210306, | |
| "count": 63875, | |
| "self": 4.704329792834869, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 147.8256257792682, | |
| "count": 62559, | |
| "self": 147.8256257792682 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.8511908668115211, | |
| "count": 63875, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2572.273079485204, | |
| "count": 63875, | |
| "is_parallel": true, | |
| "self": 910.071756786584, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0025745029997779056, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006985610007177456, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.00187594199906016, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.00187594199906016 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05431029000010312, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005840539997734595, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004572950001602294, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004572950001602294 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.051337142000193126, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.051337142000193126 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0019317989999763086, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003975529989475035, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.001534246001028805, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.001534246001028805 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1662.20132269862, | |
| "count": 63874, | |
| "is_parallel": true, | |
| "self": 33.787078112688505, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 24.319504085090557, | |
| "count": 63874, | |
| "is_parallel": true, | |
| "self": 24.319504085090557 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1489.7498081190497, | |
| "count": 63874, | |
| "is_parallel": true, | |
| "self": 1489.7498081190497 | |
| }, | |
| "steps_from_proto": { | |
| "total": 114.3449323817913, | |
| "count": 63874, | |
| "is_parallel": true, | |
| "self": 23.80874632185987, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 90.53618605993142, | |
| "count": 510992, | |
| "is_parallel": true, | |
| "self": 90.53618605993142 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 637.7542376389974, | |
| "count": 63875, | |
| "self": 2.6083143510059017, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 113.30397695900592, | |
| "count": 63875, | |
| "self": 113.05157624300864, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.25240071599728253, | |
| "count": 2, | |
| "self": 0.25240071599728253 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 521.8419463289856, | |
| "count": 454, | |
| "self": 278.5588109348655, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 243.2831353941201, | |
| "count": 22803, | |
| "self": 243.2831353941201 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 8.639999578008428e-07, | |
| "count": 1, | |
| "self": 8.639999578008428e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.0773461029984901, | |
| "count": 1, | |
| "self": 0.0009508419989288086, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.07639526099956129, | |
| "count": 1, | |
| "self": 0.07639526099956129 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |