Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use alexillovsky/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use alexillovsky/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="alexillovsky/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.17382673919200897, | |
| "min": 0.17382673919200897, | |
| "max": 1.4680708646774292, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 5245.3955078125, | |
| "min": 5245.3955078125, | |
| "max": 44535.3984375, | |
| "count": 67 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 2009965.0, | |
| "min": 29952.0, | |
| "max": 2009965.0, | |
| "count": 67 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 2009965.0, | |
| "min": 29952.0, | |
| "max": 2009965.0, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.7971019744873047, | |
| "min": -0.060563962906599045, | |
| "max": 0.838578462600708, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 238.33349609375, | |
| "min": -14.595914840698242, | |
| "max": 256.6050109863281, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.019519036635756493, | |
| "min": -0.028085948899388313, | |
| "max": 0.44714999198913574, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 5.8361921310424805, | |
| "min": -8.257268905639648, | |
| "max": 105.97454833984375, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06736226267466995, | |
| "min": 0.06419471886363767, | |
| "max": 0.07418490870969392, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9430716774453793, | |
| "min": 0.5059105568176039, | |
| "max": 1.0986826199040673, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.014044906552398383, | |
| "min": 0.0006713748298714519, | |
| "max": 0.01684946114500151, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.19662869173357736, | |
| "min": 0.007385123128585971, | |
| "max": 0.24235258578019234, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 0.0001004180736701881, | |
| "min": 0.0001004180736701881, | |
| "max": 0.00029838354339596195, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.0014058530313826333, | |
| "min": 0.0014058530313826333, | |
| "max": 0.004162595112468333, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.13347266904761904, | |
| "min": 0.13347266904761904, | |
| "max": 0.19946118095238097, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.8686173666666666, | |
| "min": 1.3962282666666668, | |
| "max": 2.887531666666667, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.0033539196378571424, | |
| "min": 0.0033539196378571424, | |
| "max": 0.009946171977142856, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.046954874929999994, | |
| "min": 0.046954874929999994, | |
| "max": 0.1387644135, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.009193127043545246, | |
| "min": 0.008977246470749378, | |
| "max": 0.5175396800041199, | |
| "count": 67 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.128703773021698, | |
| "min": 0.12634122371673584, | |
| "max": 3.6227777004241943, | |
| "count": 67 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 231.1640625, | |
| "min": 215.0359712230216, | |
| "max": 999.0, | |
| "count": 67 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 29589.0, | |
| "min": 15984.0, | |
| "max": 33752.0, | |
| "count": 67 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.7532062327954918, | |
| "min": -1.0000000521540642, | |
| "max": 1.7849640184812408, | |
| "count": 67 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 224.41039779782295, | |
| "min": -30.99840161204338, | |
| "max": 248.10999856889248, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.7532062327954918, | |
| "min": -1.0000000521540642, | |
| "max": 1.7849640184812408, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 224.41039779782295, | |
| "min": -30.99840161204338, | |
| "max": 248.10999856889248, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.0218517561149838, | |
| "min": 0.02074999827430952, | |
| "max": 11.254901316016912, | |
| "count": 67 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 2.7970247827179264, | |
| "min": 2.6799589603906497, | |
| "max": 180.0784210562706, | |
| "count": 67 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 67 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 67 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1776255384", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/content/ml-agents/ml-agents/mlagents/trainers/learn.py ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1776258959" | |
| }, | |
| "total": 3574.305676917, | |
| "count": 1, | |
| "self": 0.28823517199998605, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.020537009000008766, | |
| "count": 1, | |
| "self": 0.020537009000008766 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 3573.996904736, | |
| "count": 1, | |
| "self": 2.931396191967451, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.424915349999992, | |
| "count": 1, | |
| "self": 2.424915349999992 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 3568.5578496920325, | |
| "count": 131623, | |
| "self": 2.761282976023722, | |
| "children": { | |
| "env_step": { | |
| "total": 2259.007255424086, | |
| "count": 131623, | |
| "self": 1955.863011068092, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 301.4284310940611, | |
| "count": 131623, | |
| "self": 8.978728439078736, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 292.4497026549824, | |
| "count": 126698, | |
| "self": 292.4497026549824 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 1.7158132619330217, | |
| "count": 131623, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 3568.7546765180873, | |
| "count": 131623, | |
| "is_parallel": true, | |
| "self": 1800.845883619094, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.001861339000015505, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006241529998760598, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0012371860001394452, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0012371860001394452 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.03911051700004009, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00040683100007754547, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.00030404899996483437, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00030404899996483437 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.03727141000001666, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.03727141000001666 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0011282269999810524, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00027942600002006657, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0008488009999609858, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0008488009999609858 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1767.9087928989934, | |
| "count": 131622, | |
| "is_parallel": true, | |
| "self": 42.61845364200394, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 28.616470887966216, | |
| "count": 131622, | |
| "is_parallel": true, | |
| "self": 28.616470887966216 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1574.5572109919842, | |
| "count": 131622, | |
| "is_parallel": true, | |
| "self": 1574.5572109919842 | |
| }, | |
| "steps_from_proto": { | |
| "total": 122.11665737703902, | |
| "count": 131622, | |
| "is_parallel": true, | |
| "self": 26.733273666144896, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 95.38338371089412, | |
| "count": 1052976, | |
| "is_parallel": true, | |
| "self": 95.38338371089412 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 1306.7893112919228, | |
| "count": 131623, | |
| "self": 5.7719391719961095, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 248.87537905092483, | |
| "count": 131623, | |
| "self": 248.54681763092515, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.32856141999968713, | |
| "count": 4, | |
| "self": 0.32856141999968713 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 1052.1419930690017, | |
| "count": 942, | |
| "self": 569.5711688639472, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 482.5708242050546, | |
| "count": 46133, | |
| "self": 482.5708242050546 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.5960004020598717e-06, | |
| "count": 1, | |
| "self": 1.5960004020598717e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.08274190599968279, | |
| "count": 1, | |
| "self": 0.0010111109995705192, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.08173079500011227, | |
| "count": 1, | |
| "self": 0.08173079500011227 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |