Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use palanish12/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use palanish12/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="palanish12/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.27607762813568115, | |
| "min": 0.24586544930934906, | |
| "max": 1.4860436916351318, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 5495.048828125, | |
| "min": 4933.04443359375, | |
| "max": 29863.533203125, | |
| "count": 50 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 999890.0, | |
| "min": 19968.0, | |
| "max": 999890.0, | |
| "count": 50 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 999890.0, | |
| "min": 19968.0, | |
| "max": 999890.0, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.6114235520362854, | |
| "min": -0.07502932846546173, | |
| "max": 0.6669875383377075, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 113.11335754394531, | |
| "min": -12.004693031311035, | |
| "max": 124.05967712402344, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.039064064621925354, | |
| "min": 0.0345841683447361, | |
| "max": 1.9643781185150146, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 7.226851940155029, | |
| "min": 6.432655334472656, | |
| "max": 314.3005065917969, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06856900762667303, | |
| "min": 0.06283093447348585, | |
| "max": 0.07372255274638248, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.6856900762667304, | |
| "min": 0.2948902109855299, | |
| "max": 0.7142337102388197, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.015695929414405507, | |
| "min": 0.0009867582092490793, | |
| "max": 0.2095112097462982, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.15695929414405507, | |
| "min": 0.008880823883241714, | |
| "max": 0.8380448389851928, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 3.0703589765800004e-06, | |
| "min": 3.0703589765800004e-06, | |
| "max": 0.0002969568010144, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 3.0703589765800006e-05, | |
| "min": 3.0703589765800006e-05, | |
| "max": 0.0025507324497558996, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.10102342, | |
| "min": 0.10102342, | |
| "max": 0.19898559999999998, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 1.0102342, | |
| "min": 0.7959423999999999, | |
| "max": 1.8502441000000003, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.00011223965800000003, | |
| "min": 0.00011223965800000003, | |
| "max": 0.00989866144, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.0011223965800000004, | |
| "min": 0.0011223965800000004, | |
| "max": 0.08503938559, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.05083625391125679, | |
| "min": 0.04953589662909508, | |
| "max": 3.5884857177734375, | |
| "count": 50 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.5083625316619873, | |
| "min": 0.445823073387146, | |
| "max": 14.35394287109375, | |
| "count": 50 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 312.23809523809524, | |
| "min": 261.9861111111111, | |
| "max": 999.0, | |
| "count": 50 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 19671.0, | |
| "min": 15984.0, | |
| "max": 28879.0, | |
| "count": 50 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.5963406036607921, | |
| "min": -1.0000000521540642, | |
| "max": 1.7192698371788813, | |
| "count": 50 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 102.1657986342907, | |
| "min": -20.905201599001884, | |
| "max": 123.13639903068542, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.5963406036607921, | |
| "min": -1.0000000521540642, | |
| "max": 1.7192698371788813, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 102.1657986342907, | |
| "min": -20.905201599001884, | |
| "max": 123.13639903068542, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.16292411482936586, | |
| "min": 0.1494339367491193, | |
| "max": 56.01273512840271, | |
| "count": 50 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 10.427143349079415, | |
| "min": 9.75880633178167, | |
| "max": 896.2037620544434, | |
| "count": 50 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 50 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 50 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1780402166", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn /content/ml-agents/config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids_Training --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1780404736" | |
| }, | |
| "total": 2569.8803703600006, | |
| "count": 1, | |
| "self": 0.47836302100040484, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.02345482400050969, | |
| "count": 1, | |
| "self": 0.02345482400050969 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2569.3785525149997, | |
| "count": 1, | |
| "self": 1.5023615441386937, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.1729307380001046, | |
| "count": 1, | |
| "self": 2.1729307380001046 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2565.620878907862, | |
| "count": 64143, | |
| "self": 1.5137652785351747, | |
| "children": { | |
| "env_step": { | |
| "total": 1897.8961897731388, | |
| "count": 64143, | |
| "self": 1734.042907747803, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 163.0017928205407, | |
| "count": 64143, | |
| "self": 4.970953029694101, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 158.03083979084658, | |
| "count": 62560, | |
| "self": 158.03083979084658 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.8514892047951435, | |
| "count": 64143, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2563.6564446219427, | |
| "count": 64143, | |
| "is_parallel": true, | |
| "self": 955.885842605805, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.002260352000121202, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0007410460011669784, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0015193059989542235, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0015193059989542235 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.10752102400056174, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005880640001123538, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0005247220005912823, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005247220005912823 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.10360907199992653, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.10360907199992653 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0027991659999315743, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.00040797600013320334, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.002391189999798371, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.002391189999798371 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1607.7706020161377, | |
| "count": 64142, | |
| "is_parallel": true, | |
| "self": 35.83594821573479, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 25.398353048119134, | |
| "count": 64142, | |
| "is_parallel": true, | |
| "self": 25.398353048119134 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1428.15167721311, | |
| "count": 64142, | |
| "is_parallel": true, | |
| "self": 1428.15167721311 | |
| }, | |
| "steps_from_proto": { | |
| "total": 118.38462353917384, | |
| "count": 64142, | |
| "is_parallel": true, | |
| "self": 24.316188953881465, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 94.06843458529238, | |
| "count": 513136, | |
| "is_parallel": true, | |
| "self": 94.06843458529238 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 666.210923856188, | |
| "count": 64143, | |
| "self": 2.8341005002876045, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 131.36757172290345, | |
| "count": 64143, | |
| "self": 131.1819286509035, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.185643071999948, | |
| "count": 2, | |
| "self": 0.185643071999948 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 532.0092516329969, | |
| "count": 461, | |
| "self": 286.6405430331115, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 245.3687085998854, | |
| "count": 22785, | |
| "self": 245.3687085998854 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.0349995136493817e-06, | |
| "count": 1, | |
| "self": 1.0349995136493817e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.08238028999949165, | |
| "count": 1, | |
| "self": 0.00112524799988023, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.08125504199961142, | |
| "count": 1, | |
| "self": 0.08125504199961142 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |