ppo-Pyramid / run_logs /timers.json
refikcam's picture
refikcam/ppo-Pyramid
fe8ce49 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.6487331390380859,
"min": 0.6220056414604187,
"max": 1.3576252460479736,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 19337.4375,
"min": 18769.642578125,
"max": 41184.91796875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989887.0,
"min": 29952.0,
"max": 989887.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989887.0,
"min": 29952.0,
"max": 989887.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.3864476680755615,
"min": -0.1926925629377365,
"max": 0.393039733171463,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 100.47639465332031,
"min": -45.66813659667969,
"max": 103.36945343017578,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.020981986075639725,
"min": -0.0027117240242660046,
"max": 0.316057413816452,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 5.455316543579102,
"min": -0.7240303158760071,
"max": 75.8537826538086,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.07066173525397769,
"min": 0.06589075218402622,
"max": 0.07333172318924751,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.0599260288096652,
"min": 0.4947127919000488,
"max": 1.0599260288096652,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013733411635944827,
"min": 0.0007454511777183928,
"max": 0.014683335908644258,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.2060011745391724,
"min": 0.008945414132620714,
"max": 0.2060011745391724,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.466197511300003e-06,
"min": 7.466197511300003e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00011199296266950004,
"min": 0.00011199296266950004,
"max": 0.003507585230805,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1024887,
"min": 0.1024887,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.5373305,
"min": 1.3886848,
"max": 2.569195,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00025862113000000013,
"min": 0.00025862113000000013,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0038793169500000017,
"min": 0.0038793169500000017,
"max": 0.11694258050000003,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.008520117960870266,
"min": 0.008520117960870266,
"max": 0.39752912521362305,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.127801775932312,
"min": 0.12236807495355606,
"max": 2.7827038764953613,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 442.07936507936506,
"min": 437.61764705882354,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 27851.0,
"min": 15984.0,
"max": 31953.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.335638069089443,
"min": -1.0000000521540642,
"max": 1.4741117480923147,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 84.1451983526349,
"min": -29.8662016838789,
"max": 100.2395988702774,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.335638069089443,
"min": -1.0000000521540642,
"max": 1.4741117480923147,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 84.1451983526349,
"min": -29.8662016838789,
"max": 100.2395988702774,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.03916257257214821,
"min": 0.03916257257214821,
"max": 7.785609431564808,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.4672420720453374,
"min": 2.4672420720453374,
"max": 124.56975090503693,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1747779324",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.7.0+cu126",
"numpy_version": "1.23.5",
"end_time_seconds": "1747781530"
},
"total": 2206.399382074,
"count": 1,
"self": 0.47574324500010334,
"children": {
"run_training.setup": {
"total": 0.01989345099991624,
"count": 1,
"self": 0.01989345099991624
},
"TrainerController.start_learning": {
"total": 2205.903745378,
"count": 1,
"self": 1.5944481350315982,
"children": {
"TrainerController._reset_env": {
"total": 2.3921712650001155,
"count": 1,
"self": 2.3921712650001155
},
"TrainerController.advance": {
"total": 2201.8348122139682,
"count": 63475,
"self": 1.6490977249127354,
"children": {
"env_step": {
"total": 1519.6180689670325,
"count": 63475,
"self": 1355.214760219033,
"children": {
"SubprocessEnvManager._take_step": {
"total": 163.49947352997037,
"count": 63475,
"self": 4.862321581969127,
"children": {
"TorchPolicy.evaluate": {
"total": 158.63715194800125,
"count": 62566,
"self": 158.63715194800125
}
}
},
"workers": {
"total": 0.903835218029144,
"count": 63475,
"self": 0.0,
"children": {
"worker_root": {
"total": 2200.937781400061,
"count": 63475,
"is_parallel": true,
"self": 963.6050267800872,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.001869107999937114,
"count": 1,
"is_parallel": true,
"self": 0.000625348000539816,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001243759999397298,
"count": 8,
"is_parallel": true,
"self": 0.001243759999397298
}
}
},
"UnityEnvironment.step": {
"total": 0.048598670000046695,
"count": 1,
"is_parallel": true,
"self": 0.0005683329998191766,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000486070000079053,
"count": 1,
"is_parallel": true,
"self": 0.000486070000079053
},
"communicator.exchange": {
"total": 0.04591838300007112,
"count": 1,
"is_parallel": true,
"self": 0.04591838300007112
},
"steps_from_proto": {
"total": 0.001625884000077349,
"count": 1,
"is_parallel": true,
"self": 0.00037470500046765665,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012511789996096923,
"count": 8,
"is_parallel": true,
"self": 0.0012511789996096923
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1237.3327546199737,
"count": 63474,
"is_parallel": true,
"self": 32.74811553398763,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 23.062777600015806,
"count": 63474,
"is_parallel": true,
"self": 23.062777600015806
},
"communicator.exchange": {
"total": 1085.3611154419498,
"count": 63474,
"is_parallel": true,
"self": 1085.3611154419498
},
"steps_from_proto": {
"total": 96.16074604402047,
"count": 63474,
"is_parallel": true,
"self": 19.87558012192767,
"children": {
"_process_rank_one_or_two_observation": {
"total": 76.2851659220928,
"count": 507792,
"is_parallel": true,
"self": 76.2851659220928
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 680.5676455220232,
"count": 63475,
"self": 2.9541124660845526,
"children": {
"process_trajectory": {
"total": 128.22411853293943,
"count": 63475,
"self": 128.02729025393955,
"children": {
"RLTrainer._checkpoint": {
"total": 0.19682827899987387,
"count": 2,
"self": 0.19682827899987387
}
}
},
"_update_policy": {
"total": 549.3894145229992,
"count": 448,
"self": 303.06348119796417,
"children": {
"TorchPPOOptimizer.update": {
"total": 246.32593332503507,
"count": 22824,
"self": 246.32593332503507
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.860000318440143e-07,
"count": 1,
"self": 9.860000318440143e-07
},
"TrainerController._save_models": {
"total": 0.08231277799995951,
"count": 1,
"self": 0.0014425479998863011,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0808702300000732,
"count": 1,
"self": 0.0808702300000732
}
}
}
}
}
}
}