Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use Alopezcordero/ppo-PyramidsRND with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Alopezcordero/ppo-PyramidsRND with ml-agents:
mlagents-load-from-hf --repo-id="Alopezcordero/ppo-PyramidsRND" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.15038228034973145, | |
| "min": 0.15038228034973145, | |
| "max": 1.3743131160736084, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 4528.31103515625, | |
| "min": 4528.31103515625, | |
| "max": 41691.1640625, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 2999974.0, | |
| "min": 29952.0, | |
| "max": 2999974.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 2999974.0, | |
| "min": 29952.0, | |
| "max": 2999974.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.7555682063102722, | |
| "min": -0.15475746989250183, | |
| "max": 0.859150767326355, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 225.91488647460938, | |
| "min": -36.677520751953125, | |
| "max": 259.4635314941406, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.00763231934979558, | |
| "min": -0.0070030526258051395, | |
| "max": 0.3295813798904419, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 2.2820634841918945, | |
| "min": -1.9082553386688232, | |
| "max": 79.75869750976562, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06996480564495348, | |
| "min": 0.0635249337354034, | |
| "max": 0.07332698063969256, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9795072790293489, | |
| "min": 0.5097892963785434, | |
| "max": 1.0714507637797699, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.012787160794167512, | |
| "min": 0.0011003911749085144, | |
| "max": 0.017103391827688375, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.17902025111834516, | |
| "min": 0.0154054764487192, | |
| "max": 0.24467762637205479, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 1.5081637830261894e-06, | |
| "min": 1.5081637830261894e-06, | |
| "max": 0.00029838354339596195, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 2.111429296236665e-05, | |
| "min": 2.111429296236665e-05, | |
| "max": 0.004052817149060966, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10050268809523812, | |
| "min": 0.10050268809523812, | |
| "max": 0.19946118095238097, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4070376333333336, | |
| "min": 1.3962282666666668, | |
| "max": 2.7674522333333336, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 6.0218540714285675e-05, | |
| "min": 6.0218540714285675e-05, | |
| "max": 0.009946171977142856, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0008430595699999994, | |
| "min": 0.0008430595699999994, | |
| "max": 0.13509880943, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.005964450538158417, | |
| "min": 0.005964450538158417, | |
| "max": 0.5031009316444397, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.08350230753421783, | |
| "min": 0.08350230753421783, | |
| "max": 3.5217065811157227, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 215.03546099290782, | |
| "min": 215.03546099290782, | |
| "max": 999.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 30320.0, | |
| "min": 15984.0, | |
| "max": 33050.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.7423957307816398, | |
| "min": -1.0000000521540642, | |
| "max": 1.7862137327439913, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 245.6777980402112, | |
| "min": -27.638801611959934, | |
| "max": 245.6777980402112, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.7423957307816398, | |
| "min": -1.0000000521540642, | |
| "max": 1.7862137327439913, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 245.6777980402112, | |
| "min": -27.638801611959934, | |
| "max": 245.6777980402112, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.013373334838633908, | |
| "min": 0.013373334838633908, | |
| "max": 10.02461700886488, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 1.885640212247381, | |
| "min": 1.8233160735690035, | |
| "max": 160.39387214183807, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1780650669", | |
| "python_version": "3.8.20 (default, Oct 3 2024, 15:24:27) \n[GCC 11.2.0]", | |
| "command_line_arguments": "/home/alejandrolopezcordero01/miniconda3/envs/snowball-rl/bin/mlagents-learn ./ppo-huggyDog/ml-agents/config/ppo/PyramidsRND.yaml --env=./snowball-env/training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids training --train --no-graphics --force --torch-device cpu", | |
| "mlagents_version": "0.30.0", | |
| "mlagents_envs_version": "0.30.0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "1.8.1+cu102", | |
| "numpy_version": "1.21.2", | |
| "end_time_seconds": "1780654731" | |
| }, | |
| "total": 4061.50532707572, | |
| "count": 1, | |
| "self": 0.4778522551059723, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.012601539492607117, | |
| "count": 1, | |
| "self": 0.012601539492607117 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 4061.0148732811213, | |
| "count": 1, | |
| "self": 2.7678375989198685, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 1.6024729013442993, | |
| "count": 1, | |
| "self": 1.6024729013442993 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 4056.372522071004, | |
| "count": 195186, | |
| "self": 2.6755558401346207, | |
| "children": { | |
| "env_step": { | |
| "total": 2498.7542427629232, | |
| "count": 195186, | |
| "self": 2318.9857820123434, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 178.02622325718403, | |
| "count": 195186, | |
| "self": 9.864511951804161, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 168.16171130537987, | |
| "count": 187549, | |
| "self": 168.16171130537987 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 1.7422374933958054, | |
| "count": 195186, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 4056.2051654458046, | |
| "count": 195186, | |
| "is_parallel": true, | |
| "self": 1933.2795535624027, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0013557225465774536, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00038802623748779297, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0009676963090896606, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0009676963090896606 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.037779852747917175, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0002923905849456787, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.00023037195205688477, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00023037195205688477 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.036429911851882935, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.036429911851882935 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0008271783590316772, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00019437074661254883, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0006328076124191284, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0006328076124191284 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 2122.925611883402, | |
| "count": 195185, | |
| "is_parallel": true, | |
| "self": 49.46420207619667, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 35.146693751215935, | |
| "count": 195185, | |
| "is_parallel": true, | |
| "self": 35.146693751215935 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1901.2837611883879, | |
| "count": 195185, | |
| "is_parallel": true, | |
| "self": 1901.2837611883879 | |
| }, | |
| "steps_from_proto": { | |
| "total": 137.0309548676014, | |
| "count": 195185, | |
| "is_parallel": true, | |
| "self": 30.470472112298012, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 106.56048275530338, | |
| "count": 1561480, | |
| "is_parallel": true, | |
| "self": 106.56048275530338 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 1554.942723467946, | |
| "count": 195186, | |
| "self": 4.506740972399712, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 226.98552429676056, | |
| "count": 195186, | |
| "self": 225.42906093597412, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 1.556463360786438, | |
| "count": 6, | |
| "self": 1.556463360786438 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 1323.4504581987858, | |
| "count": 1404, | |
| "self": 758.6627835333347, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 564.787674665451, | |
| "count": 68355, | |
| "self": 564.787674665451 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 8.791685104370117e-07, | |
| "count": 1, | |
| "self": 8.791685104370117e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.27203983068466187, | |
| "count": 1, | |
| "self": 0.03875349462032318, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.23328633606433868, | |
| "count": 1, | |
| "self": 0.23328633606433868 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |