Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use Adi-AI-2005/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Adi-AI-2005/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="Adi-AI-2005/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.8218531012535095, | |
| "min": 0.8218531012535095, | |
| "max": 1.4017139673233032, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 24787.08984375, | |
| "min": 22427.423828125, | |
| "max": 38343.68359375, | |
| "count": 16 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 479892.0, | |
| "min": 29983.0, | |
| "max": 479892.0, | |
| "count": 16 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 479892.0, | |
| "min": 29983.0, | |
| "max": 479892.0, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.16719870269298553, | |
| "min": -0.09453507512807846, | |
| "max": 0.18006311357021332, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 41.79967498779297, | |
| "min": -22.499347686767578, | |
| "max": 45.19584274291992, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.04031394422054291, | |
| "min": 0.008605342358350754, | |
| "max": 0.4116668403148651, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 10.078486442565918, | |
| "min": 2.1169142723083496, | |
| "max": 66.13185119628906, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.07071494894710777, | |
| "min": 0.06409087616463414, | |
| "max": 0.07432501029365489, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9900092852595088, | |
| "min": 0.22297503088096465, | |
| "max": 1.0283716070727107, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.01113278306284984, | |
| "min": 0.0015895596431628114, | |
| "max": 0.01113278306284984, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.15585896287989776, | |
| "min": 0.008619954277795236, | |
| "max": 0.15585896287989776, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 2.0581250282471426e-05, | |
| "min": 2.0581250282471426e-05, | |
| "max": 0.0002865078044974, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.00028813750395459994, | |
| "min": 0.00028813750395459994, | |
| "max": 0.0028506831497724003, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10686038571428572, | |
| "min": 0.10686038571428572, | |
| "max": 0.1955026, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4960454, | |
| "min": 0.5865078, | |
| "max": 2.3381796, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.0006953525328571428, | |
| "min": 0.0006953525328571428, | |
| "max": 0.009550709740000001, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.00973493546, | |
| "min": 0.00973493546, | |
| "max": 0.09505773724000002, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.016086162999272346, | |
| "min": 0.015609530732035637, | |
| "max": 0.23164601624011993, | |
| "count": 16 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.22520627081394196, | |
| "min": 0.21853342652320862, | |
| "max": 1.1311066150665283, | |
| "count": 16 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 658.0909090909091, | |
| "min": 656.1777777777778, | |
| "max": 999.0, | |
| "count": 16 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 28956.0, | |
| "min": 15984.0, | |
| "max": 32536.0, | |
| "count": 16 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 0.7508090504191138, | |
| "min": -1.0000000521540642, | |
| "max": 0.7508090504191138, | |
| "count": 16 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 33.03559821844101, | |
| "min": -26.086401507258415, | |
| "max": 33.03559821844101, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 0.7508090504191138, | |
| "min": -1.0000000521540642, | |
| "max": 0.7508090504191138, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 33.03559821844101, | |
| "min": -26.086401507258415, | |
| "max": 33.03559821844101, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.1074948328314349, | |
| "min": 0.10620525570638063, | |
| "max": 2.8118732445515118, | |
| "count": 16 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 4.729772644583136, | |
| "min": 4.729772644583136, | |
| "max": 36.554352179169655, | |
| "count": 16 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 16 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 16 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1736783484", | |
| "python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics --resume", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.5.1+cu121", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1736784508" | |
| }, | |
| "total": 1024.626133001, | |
| "count": 1, | |
| "self": 0.4771244139999453, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.05486030099996242, | |
| "count": 1, | |
| "self": 0.05486030099996242 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 1024.094148286, | |
| "count": 1, | |
| "self": 0.6168152419968465, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.1712185540000064, | |
| "count": 1, | |
| "self": 2.1712185540000064 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 1021.2239919380033, | |
| "count": 30786, | |
| "self": 0.6336862899715925, | |
| "children": { | |
| "env_step": { | |
| "total": 695.8379171090241, | |
| "count": 30786, | |
| "self": 625.2730052410059, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 70.18478575300469, | |
| "count": 30786, | |
| "self": 2.1632423169855883, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 68.0215434360191, | |
| "count": 30436, | |
| "self": 68.0215434360191 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.3801261150135815, | |
| "count": 30786, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 1021.860913875991, | |
| "count": 30786, | |
| "is_parallel": true, | |
| "self": 451.09090727198486, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.00209486099993228, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006973370000196155, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013975239999126643, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013975239999126643 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.0452558769998177, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005682189998879039, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004359550000572199, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004359550000572199 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.04256188099998326, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.04256188099998326 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0016898219998893182, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003678539997054031, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013219680001839151, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013219680001839151 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 570.7700066040061, | |
| "count": 30785, | |
| "is_parallel": true, | |
| "self": 15.56219973903876, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 11.16187519201435, | |
| "count": 30785, | |
| "is_parallel": true, | |
| "self": 11.16187519201435 | |
| }, | |
| "communicator.exchange": { | |
| "total": 497.36558166495774, | |
| "count": 30785, | |
| "is_parallel": true, | |
| "self": 497.36558166495774 | |
| }, | |
| "steps_from_proto": { | |
| "total": 46.68035000799523, | |
| "count": 30785, | |
| "is_parallel": true, | |
| "self": 9.347248153031842, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 37.33310185496339, | |
| "count": 246280, | |
| "is_parallel": true, | |
| "self": 37.33310185496339 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 324.7523885390076, | |
| "count": 30786, | |
| "self": 1.1181802269559284, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 62.56286166304949, | |
| "count": 30786, | |
| "self": 62.44559191104986, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.11726975199962908, | |
| "count": 1, | |
| "self": 0.11726975199962908 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 261.0713466490022, | |
| "count": 210, | |
| "self": 147.64122675499607, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 113.43011989400611, | |
| "count": 11070, | |
| "self": 113.43011989400611 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 9.169998520519584e-07, | |
| "count": 1, | |
| "self": 9.169998520519584e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.08212163500002134, | |
| "count": 1, | |
| "self": 0.0020013969997307868, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.08012023800029056, | |
| "count": 1, | |
| "self": 0.08012023800029056 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |