ppo-PyramidsRND / run_logs /timers.json
duydl's picture
First Push
f730ab0 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.47231432795524597,
"min": 0.4594147205352783,
"max": 1.4196290969848633,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 14237.443359375,
"min": 13642.779296875,
"max": 43065.8671875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989905.0,
"min": 29945.0,
"max": 989905.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989905.0,
"min": 29945.0,
"max": 989905.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5294958353042603,
"min": -0.10698656737804413,
"max": 0.5612090229988098,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 142.96388244628906,
"min": -25.676776885986328,
"max": 156.31027221679688,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.08520294725894928,
"min": -0.10241486132144928,
"max": 0.3250313699245453,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -23.00479507446289,
"min": -28.266502380371094,
"max": 77.03243255615234,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.07057961595654931,
"min": 0.06527526476906696,
"max": 0.07445376670401847,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9881146233916904,
"min": 0.6573381081622225,
"max": 1.0882986997523547,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.018235645639362513,
"min": 0.0007900537469927757,
"max": 0.027781437295960217,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.2552990389510752,
"min": 0.008690591216920533,
"max": 0.38894012214344303,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.693661721192854e-06,
"min": 7.693661721192854e-06,
"max": 0.0002953488015504,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010771126409669995,
"min": 0.00010771126409669995,
"max": 0.0036324868891711003,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10256452142857143,
"min": 0.10256452142857143,
"max": 0.19844960000000003,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4359033,
"min": 1.4359033,
"max": 2.6108288999999996,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00026619569071428557,
"min": 0.00026619569071428557,
"max": 0.00984511504,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.003726739669999998,
"min": 0.003726739669999998,
"max": 0.12110180710999999,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.012336241081357002,
"min": 0.012336241081357002,
"max": 0.4108613431453705,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.17270737886428833,
"min": 0.17270737886428833,
"max": 3.697751998901367,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 382.25333333333333,
"min": 337.0108695652174,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 28669.0,
"min": 16488.0,
"max": 34420.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.59279471400537,
"min": -0.999987552408129,
"max": 1.6629891079729018,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 121.05239826440811,
"min": -31.999601677060127,
"max": 152.99499793350697,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.59279471400537,
"min": -0.999987552408129,
"max": 1.6629891079729018,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 121.05239826440811,
"min": -31.999601677060127,
"max": 152.99499793350697,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.04860202457884474,
"min": 0.045572998462418135,
"max": 7.056986880653045,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.6937538679922,
"min": 3.6937538679922,
"max": 119.96877697110176,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1751753908",
"python_version": "3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
"command_line_arguments": "/mnt/Work/Projects/huggingface/deep-rl-class/.pixi/envs/default/bin/mlagents-learn /mnt/Work/Projects/huggingface/deep-rl-class/notebooks/unit5/ml-agents/config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.7.1+cu126",
"numpy_version": "1.23.5",
"end_time_seconds": "1751756568"
},
"total": 2659.6951590560057,
"count": 1,
"self": 0.5756995380070293,
"children": {
"run_training.setup": {
"total": 0.03354090500215534,
"count": 1,
"self": 0.03354090500215534
},
"TrainerController.start_learning": {
"total": 2659.0859186129965,
"count": 1,
"self": 1.4436060408334015,
"children": {
"TrainerController._reset_env": {
"total": 3.296384022003622,
"count": 1,
"self": 3.296384022003622
},
"TrainerController.advance": {
"total": 2654.2551208451623,
"count": 63714,
"self": 1.375572186880163,
"children": {
"env_step": {
"total": 1747.452128482415,
"count": 63714,
"self": 1547.8593896673556,
"children": {
"SubprocessEnvManager._take_step": {
"total": 198.72030910917965,
"count": 63714,
"self": 4.3418682213523425,
"children": {
"TorchPolicy.evaluate": {
"total": 194.3784408878273,
"count": 62574,
"self": 194.3784408878273
}
}
},
"workers": {
"total": 0.8724297058797674,
"count": 63714,
"self": 0.0,
"children": {
"worker_root": {
"total": 2655.7355341729854,
"count": 63714,
"is_parallel": true,
"self": 1226.1389424734953,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0032264039909932762,
"count": 1,
"is_parallel": true,
"self": 0.0009732259786687791,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002253178012324497,
"count": 8,
"is_parallel": true,
"self": 0.002253178012324497
}
}
},
"UnityEnvironment.step": {
"total": 0.046477704992867075,
"count": 1,
"is_parallel": true,
"self": 0.0006991079862928018,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000384042999939993,
"count": 1,
"is_parallel": true,
"self": 0.000384042999939993
},
"communicator.exchange": {
"total": 0.04314061100012623,
"count": 1,
"is_parallel": true,
"self": 0.04314061100012623
},
"steps_from_proto": {
"total": 0.0022539430065080523,
"count": 1,
"is_parallel": true,
"self": 0.0005181740270927548,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0017357689794152975,
"count": 8,
"is_parallel": true,
"self": 0.0017357689794152975
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1429.59659169949,
"count": 63713,
"is_parallel": true,
"self": 42.923033282655524,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 31.730604372161906,
"count": 63713,
"is_parallel": true,
"self": 31.730604372161906
},
"communicator.exchange": {
"total": 1227.3228286470257,
"count": 63713,
"is_parallel": true,
"self": 1227.3228286470257
},
"steps_from_proto": {
"total": 127.62012539764692,
"count": 63713,
"is_parallel": true,
"self": 28.40294701809762,
"children": {
"_process_rank_one_or_two_observation": {
"total": 99.2171783795493,
"count": 509704,
"is_parallel": true,
"self": 99.2171783795493
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 905.4274201758672,
"count": 63714,
"self": 2.4949394092691364,
"children": {
"process_trajectory": {
"total": 146.03519217846042,
"count": 63714,
"self": 145.825438702479,
"children": {
"RLTrainer._checkpoint": {
"total": 0.20975347598141525,
"count": 2,
"self": 0.20975347598141525
}
}
},
"_update_policy": {
"total": 756.8972885881376,
"count": 456,
"self": 351.75980494651594,
"children": {
"TorchPPOOptimizer.update": {
"total": 405.13748364162166,
"count": 22833,
"self": 405.13748364162166
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.67993400990963e-07,
"count": 1,
"self": 9.67993400990963e-07
},
"TrainerController._save_models": {
"total": 0.09080673700373154,
"count": 1,
"self": 0.001062789000570774,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08974394800316077,
"count": 1,
"self": 0.08974394800316077
}
}
}
}
}
}
}