Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use brk0zt/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use brk0zt/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="brk0zt/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.8429846167564392, | |
| "min": 0.8429846167564392, | |
| "max": 1.502804160118103, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 25289.5390625, | |
| "min": 25289.5390625, | |
| "max": 45589.06640625, | |
| "count": 17 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 509882.0, | |
| "min": 29952.0, | |
| "max": 509882.0, | |
| "count": 17 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 509882.0, | |
| "min": 29952.0, | |
| "max": 509882.0, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.3478340208530426, | |
| "min": -0.08725451678037643, | |
| "max": 0.35829678177833557, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 90.43684387207031, | |
| "min": -21.028339385986328, | |
| "max": 93.15716552734375, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.03225788101553917, | |
| "min": 0.018812665715813637, | |
| "max": 0.28176987171173096, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 8.387048721313477, | |
| "min": 4.834855079650879, | |
| "max": 66.77945709228516, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06887973945136071, | |
| "min": 0.06583450421198865, | |
| "max": 0.07254255880087727, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9643163523190499, | |
| "min": 0.495612762338514, | |
| "max": 1.0409129210553751, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.011578065592835802, | |
| "min": 0.0007586126401399092, | |
| "max": 0.011578065592835802, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.1620929182997012, | |
| "min": 0.010620576961958728, | |
| "max": 0.1620929182997012, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 0.00025050805221160714, | |
| "min": 0.00025050805221160714, | |
| "max": 0.00029838354339596195, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.0035071127309625, | |
| "min": 0.0020886848037717336, | |
| "max": 0.0039694597768467664, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.1835026785714286, | |
| "min": 0.1835026785714286, | |
| "max": 0.19946118095238097, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 2.5690375000000003, | |
| "min": 1.3962282666666668, | |
| "max": 2.812430733333333, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.008351917589285714, | |
| "min": 0.008351917589285714, | |
| "max": 0.009946171977142856, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.11692684625, | |
| "min": 0.06962320384, | |
| "max": 0.13232300801000002, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.01609208807349205, | |
| "min": 0.01609208807349205, | |
| "max": 0.37398189306259155, | |
| "count": 17 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.2252892404794693, | |
| "min": 0.2252892404794693, | |
| "max": 2.617873191833496, | |
| "count": 17 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 515.8983050847457, | |
| "min": 515.8983050847457, | |
| "max": 999.0, | |
| "count": 17 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 30438.0, | |
| "min": 15984.0, | |
| "max": 32940.0, | |
| "count": 17 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.2128575999858016, | |
| "min": -1.0000000521540642, | |
| "max": 1.2128575999858016, | |
| "count": 17 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 71.55859839916229, | |
| "min": -30.331601656973362, | |
| "max": 71.55859839916229, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.2128575999858016, | |
| "min": -1.0000000521540642, | |
| "max": 1.2128575999858016, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 71.55859839916229, | |
| "min": -30.331601656973362, | |
| "max": 71.55859839916229, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.08625079860409596, | |
| "min": 0.08625079860409596, | |
| "max": 7.604195029474795, | |
| "count": 17 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 5.088797117641661, | |
| "min": 5.088797117641661, | |
| "max": 121.66712047159672, | |
| "count": 17 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 17 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 17 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1777211945", | |
| "python_version": "3.10.12 (main, Mar 3 2026, 11:56:32) [GCC 11.4.0]", | |
| "command_line_arguments": "/content/ml-agents/ml-agents/mlagents/trainers/learn.py /content/ml-agents/config/ppo/PyramidsRND.yaml --env=/content/ml-agents/training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids_Training_1 --no-graphics", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1777213070" | |
| }, | |
| "total": 1124.9908681449997, | |
| "count": 1, | |
| "self": 0.5347418509995805, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.01991339899996092, | |
| "count": 1, | |
| "self": 0.01991339899996092 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 1124.4362128950002, | |
| "count": 1, | |
| "self": 0.7297435449704608, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.2816635670001233, | |
| "count": 1, | |
| "self": 2.2816635670001233 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 1121.41658331203, | |
| "count": 33155, | |
| "self": 0.7267180860485496, | |
| "children": { | |
| "env_step": { | |
| "total": 769.5420282249852, | |
| "count": 33155, | |
| "self": 688.3621011839036, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 80.7302132740092, | |
| "count": 33155, | |
| "self": 2.528849327090029, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 78.20136394691917, | |
| "count": 32767, | |
| "self": 78.20136394691917 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.4497137670723532, | |
| "count": 33155, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 1121.2244427579517, | |
| "count": 33155, | |
| "is_parallel": true, | |
| "self": 494.2237498019972, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0018577669998194324, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005981829995107546, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0012595840003086778, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0012595840003086778 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.05001835900020524, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.000593998000567808, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0005207499998505227, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005207499998505227 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.047078810000130034, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.047078810000130034 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0018248009996568726, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003805699998338241, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0014442309998230485, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0014442309998230485 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 627.0006929559545, | |
| "count": 33154, | |
| "is_parallel": true, | |
| "self": 17.481418120063154, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 12.174977224905433, | |
| "count": 33154, | |
| "is_parallel": true, | |
| "self": 12.174977224905433 | |
| }, | |
| "communicator.exchange": { | |
| "total": 539.7229246840079, | |
| "count": 33154, | |
| "is_parallel": true, | |
| "self": 539.7229246840079 | |
| }, | |
| "steps_from_proto": { | |
| "total": 57.62137292697798, | |
| "count": 33154, | |
| "is_parallel": true, | |
| "self": 11.982562276967656, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 45.63881065001033, | |
| "count": 265232, | |
| "is_parallel": true, | |
| "self": 45.63881065001033 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 351.14783700099633, | |
| "count": 33155, | |
| "self": 1.3211543039465141, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 66.08043701904444, | |
| "count": 33155, | |
| "self": 65.9015423650444, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.17889465400003246, | |
| "count": 1, | |
| "self": 0.17889465400003246 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 283.7462456780054, | |
| "count": 227, | |
| "self": 158.3459020819846, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 125.40034359602078, | |
| "count": 11931, | |
| "self": 125.40034359602078 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.7249994925805368e-06, | |
| "count": 1, | |
| "self": 1.7249994925805368e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.00822074600000633, | |
| "count": 1, | |
| "self": 0.00014616600037697935, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.00807457999962935, | |
| "count": 1, | |
| "self": 0.00807457999962935 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |