Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use Ilyaminch/ppo-Pyramids_Training with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Ilyaminch/ppo-Pyramids_Training with ml-agents:
mlagents-load-from-hf --repo-id="Ilyaminch/ppo-Pyramids_Training" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 1.0188018083572388, | |
| "min": 1.0188018083572388, | |
| "max": 1.4814026355743408, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 30515.15234375, | |
| "min": 30515.15234375, | |
| "max": 44939.83203125, | |
| "count": 3 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 89897.0, | |
| "min": 29975.0, | |
| "max": 89897.0, | |
| "count": 3 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 89897.0, | |
| "min": 29975.0, | |
| "max": 89897.0, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": -0.07936542481184006, | |
| "min": -0.17528338730335236, | |
| "max": -0.07936542481184006, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": -19.12706756591797, | |
| "min": -41.717445373535156, | |
| "max": -19.12706756591797, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.1228138655424118, | |
| "min": 0.1228138655424118, | |
| "max": 0.2524747848510742, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 29.598140716552734, | |
| "min": 29.598140716552734, | |
| "max": 60.0890007019043, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.0712435307308123, | |
| "min": 0.06850825346092108, | |
| "max": 0.07249792377210806, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9261658995005598, | |
| "min": 0.5074854664047564, | |
| "max": 0.9261658995005598, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.0017534098856690687, | |
| "min": 0.0017534098856690687, | |
| "max": 0.004709439305010349, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.022794328513697892, | |
| "min": 0.022794328513697892, | |
| "max": 0.0452753760948104, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 7.572422860476921e-05, | |
| "min": 7.572422860476921e-05, | |
| "max": 0.0002515063018788571, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.0009844149718619998, | |
| "min": 0.0009844149718619998, | |
| "max": 0.0019674575441810003, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.12524138461538462, | |
| "min": 0.12524138461538462, | |
| "max": 0.1838354285714286, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.628138, | |
| "min": 1.2868480000000002, | |
| "max": 1.855819, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.002531614323076923, | |
| "min": 0.002531614323076923, | |
| "max": 0.008385159314285713, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0329109862, | |
| "min": 0.0329109862, | |
| "max": 0.0656363181, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.08811005204916, | |
| "min": 0.08811005204916, | |
| "max": 0.31133005023002625, | |
| "count": 3 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 1.1454306840896606, | |
| "min": 1.1454306840896606, | |
| "max": 2.1793103218078613, | |
| "count": 3 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 991.625, | |
| "min": 958.9411764705883, | |
| "max": 991.625, | |
| "count": 3 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 31732.0, | |
| "min": 16774.0, | |
| "max": 32604.0, | |
| "count": 3 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": -0.8674750525970012, | |
| "min": -0.8700000485953163, | |
| "max": -0.7245176971396979, | |
| "count": 3 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": -27.75920168310404, | |
| "min": -27.75920168310404, | |
| "max": -14.790000826120377, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": -0.8674750525970012, | |
| "min": -0.8700000485953163, | |
| "max": -0.7245176971396979, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": -27.75920168310404, | |
| "min": -27.75920168310404, | |
| "max": -14.790000826120377, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.9801330148475245, | |
| "min": 0.9801330148475245, | |
| "max": 6.044643790844609, | |
| "count": 3 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 31.364256475120783, | |
| "min": 31.364256475120783, | |
| "max": 102.75894444435835, | |
| "count": 3 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 3 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 3 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1780344344", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1780344559" | |
| }, | |
| "total": 214.932443445, | |
| "count": 1, | |
| "self": 0.4808614570001737, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.022835621000012907, | |
| "count": 1, | |
| "self": 0.022835621000012907 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 214.4287463669998, | |
| "count": 1, | |
| "self": 0.13084990305560495, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.193858179000017, | |
| "count": 1, | |
| "self": 2.193858179000017 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 211.99608948794412, | |
| "count": 6316, | |
| "self": 0.13376956795673323, | |
| "children": { | |
| "env_step": { | |
| "total": 145.80193447496004, | |
| "count": 6316, | |
| "self": 130.26702204399498, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 15.456462868977724, | |
| "count": 6316, | |
| "self": 0.47049408095972467, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 14.985968788017999, | |
| "count": 6303, | |
| "self": 14.985968788017999 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.07844956198732689, | |
| "count": 6316, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 213.7848134499509, | |
| "count": 6316, | |
| "is_parallel": true, | |
| "self": 95.09129380295326, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.002101048999975319, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006965219999983674, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0014045269999769516, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0014045269999769516 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.052567302999705134, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005690020007023122, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004995509998479974, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004995509998479974 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.04963096299979952, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.04963096299979952 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0018677869993553031, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00037687900112359785, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0014909079982317053, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0014909079982317053 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 118.69351964699763, | |
| "count": 6315, | |
| "is_parallel": true, | |
| "self": 3.414096788020288, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 2.3695646429923727, | |
| "count": 6315, | |
| "is_parallel": true, | |
| "self": 2.3695646429923727 | |
| }, | |
| "communicator.exchange": { | |
| "total": 101.83674765195155, | |
| "count": 6315, | |
| "is_parallel": true, | |
| "self": 101.83674765195155 | |
| }, | |
| "steps_from_proto": { | |
| "total": 11.073110564033414, | |
| "count": 6315, | |
| "is_parallel": true, | |
| "self": 2.305956688972401, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 8.767153875061013, | |
| "count": 50520, | |
| "is_parallel": true, | |
| "self": 8.767153875061013 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 66.06038544502735, | |
| "count": 6316, | |
| "self": 0.17513866199806216, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 12.307471895027447, | |
| "count": 6316, | |
| "self": 12.307471895027447 | |
| }, | |
| "_update_policy": { | |
| "total": 53.57777488800184, | |
| "count": 36, | |
| "self": 29.78163769099592, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 23.79613719700592, | |
| "count": 2274, | |
| "self": 23.79613719700592 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 9.880004654405639e-07, | |
| "count": 1, | |
| "self": 9.880004654405639e-07 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.10794780899959733, | |
| "count": 1, | |
| "self": 0.0008784579995335662, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.10706935100006376, | |
| "count": 1, | |
| "self": 0.10706935100006376 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |