Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use afsee/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use afsee/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="afsee/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.14317356050014496, | |
| "min": 0.1338392198085785, | |
| "max": 1.470719814300537, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 4249.39111328125, | |
| "min": 4002.327880859375, | |
| "max": 44615.7578125, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 2999885.0, | |
| "min": 29952.0, | |
| "max": 2999885.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 2999885.0, | |
| "min": 29952.0, | |
| "max": 2999885.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.8388898372650146, | |
| "min": -0.1523117870092392, | |
| "max": 0.908114492893219, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 260.05584716796875, | |
| "min": -36.09789276123047, | |
| "max": 283.33172607421875, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.001526888576336205, | |
| "min": -0.009870721027255058, | |
| "max": 0.379002183675766, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 0.47333547472953796, | |
| "min": -2.7045774459838867, | |
| "max": 89.82351684570312, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06949581381576579, | |
| "min": 0.06415650463757283, | |
| "max": 0.07415783993657861, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9729413934207211, | |
| "min": 0.4880005437879082, | |
| "max": 1.082951537411039, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.015120665382017336, | |
| "min": 0.0006195215525306199, | |
| "max": 0.016487620083814537, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.2116893153482427, | |
| "min": 0.00805378018289806, | |
| "max": 0.24731430125721804, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 1.5815780442690443e-06, | |
| "min": 1.5815780442690443e-06, | |
| "max": 0.00029838354339596195, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 2.2142092619766622e-05, | |
| "min": 2.2142092619766622e-05, | |
| "max": 0.004072375242541633, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10052715952380954, | |
| "min": 0.10052715952380954, | |
| "max": 0.19946118095238097, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.4073802333333336, | |
| "min": 1.3962282666666668, | |
| "max": 2.857458366666667, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 6.266323642857133e-05, | |
| "min": 6.266323642857133e-05, | |
| "max": 0.009946171977142856, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0008772853099999988, | |
| "min": 0.0008772853099999988, | |
| "max": 0.13576009083, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.005743414629250765, | |
| "min": 0.005513820331543684, | |
| "max": 0.4066004455089569, | |
| "count": 100 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.08040780574083328, | |
| "min": 0.077193483710289, | |
| "max": 2.846203088760376, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 206.86805555555554, | |
| "min": 197.2948717948718, | |
| "max": 999.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 29789.0, | |
| "min": 15984.0, | |
| "max": 34550.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.7912689579971905, | |
| "min": -1.0000000521540642, | |
| "max": 1.8027051178117592, | |
| "count": 100 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 259.73399890959263, | |
| "min": -30.725001737475395, | |
| "max": 281.22199837863445, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.7912689579971905, | |
| "min": -1.0000000521540642, | |
| "max": 1.8027051178117592, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 259.73399890959263, | |
| "min": -30.725001737475395, | |
| "max": 281.22199837863445, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.012507152822594247, | |
| "min": 0.012256550670125372, | |
| "max": 8.662854745052755, | |
| "count": 100 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 1.8135371592761658, | |
| "min": 1.715917093817552, | |
| "max": 138.60567592084408, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 100 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1780611098", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/envs/mlagents_env/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1780619353" | |
| }, | |
| "total": 8254.582021847, | |
| "count": 1, | |
| "self": 0.4970967469998868, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.021649395000054028, | |
| "count": 1, | |
| "self": 0.021649395000054028 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 8254.063275705, | |
| "count": 1, | |
| "self": 4.650923130153387, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 3.5289828820000366, | |
| "count": 1, | |
| "self": 3.5289828820000366 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 8245.804056115847, | |
| "count": 195232, | |
| "self": 4.842498358870216, | |
| "children": { | |
| "env_step": { | |
| "total": 6217.403719845916, | |
| "count": 195232, | |
| "self": 5738.55997417515, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 476.0379526200725, | |
| "count": 195232, | |
| "self": 14.673274251111593, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 461.3646783689609, | |
| "count": 187557, | |
| "self": 461.3646783689609 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 2.8057930506939783, | |
| "count": 195232, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 8236.327675038156, | |
| "count": 195232, | |
| "is_parallel": true, | |
| "self": 2876.6212362241295, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.005106592999936765, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0038550609999674634, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.001251531999969302, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.001251531999969302 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05299904200001038, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.000574484000026132, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0005894319999697473, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005894319999697473 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.05006229100001747, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.05006229100001747 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0017728349999970305, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003814200000533674, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0013914149999436631, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0013914149999436631 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 5359.706438814026, | |
| "count": 195231, | |
| "is_parallel": true, | |
| "self": 107.3555642290903, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 73.72086274595688, | |
| "count": 195231, | |
| "is_parallel": true, | |
| "self": 73.72086274595688 | |
| }, | |
| "communicator.exchange": { | |
| "total": 4830.2567602789995, | |
| "count": 195231, | |
| "is_parallel": true, | |
| "self": 4830.2567602789995 | |
| }, | |
| "steps_from_proto": { | |
| "total": 348.37325155997905, | |
| "count": 195231, | |
| "is_parallel": true, | |
| "self": 73.36704667984463, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 275.0062048801344, | |
| "count": 1561848, | |
| "is_parallel": true, | |
| "self": 275.0062048801344 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 2023.5578379110598, | |
| "count": 195232, | |
| "self": 9.044068861933056, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 397.4436617081476, | |
| "count": 195232, | |
| "self": 396.8963702281476, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.5472914800000126, | |
| "count": 6, | |
| "self": 0.5472914800000126 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 1617.0701073409791, | |
| "count": 1399, | |
| "self": 890.4759862120949, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 726.5941211288842, | |
| "count": 68382, | |
| "self": 726.5941211288842 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 9.479990694671869e-07, | |
| "count": 1, | |
| "self": 9.479990694671869e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.07931262900092406, | |
| "count": 1, | |
| "self": 0.0011163090021000244, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.07819631999882404, | |
| "count": 1, | |
| "self": 0.07819631999882404 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |