mikewzp's picture
First Training of SnowballTarget
2cbb9e4 verified
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.5176378488540649,
"min": 0.49378541111946106,
"max": 2.845227003097534,
"count": 40
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 5284.04736328125,
"min": 4692.9365234375,
"max": 29044.078125,
"count": 40
},
"SnowballTarget.Step.mean": {
"value": 399992.0,
"min": 9952.0,
"max": 399992.0,
"count": 40
},
"SnowballTarget.Step.sum": {
"value": 399992.0,
"min": 9952.0,
"max": 399992.0,
"count": 40
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.329519271850586,
"min": 0.3994121253490448,
"max": 13.483415603637695,
"count": 40
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 2599.25634765625,
"min": 77.48595428466797,
"max": 2756.82861328125,
"count": 40
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.07134130888420366,
"min": 0.061226224523338986,
"max": 0.07378290321018235,
"count": 40
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.28536523553681464,
"min": 0.24490489809335594,
"max": 0.3655463208669258,
"count": 40
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.18573323922122226,
"min": 0.1251115050490982,
"max": 0.26207093999666325,
"count": 40
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.742932956884889,
"min": 0.5004460201963928,
"max": 1.3103546999833162,
"count": 40
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 3.891098702999994e-06,
"min": 3.891098702999994e-06,
"max": 0.000295941001353,
"count": 40
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 1.5564394811999976e-05,
"min": 1.5564394811999976e-05,
"max": 0.0014425800191399996,
"count": 40
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.101297,
"min": 0.101297,
"max": 0.19864700000000002,
"count": 40
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.405188,
"min": 0.405188,
"max": 0.98086,
"count": 40
},
"SnowballTarget.Policy.Beta.mean": {
"value": 7.472029999999992e-05,
"min": 7.472029999999992e-05,
"max": 0.004932485299999999,
"count": 40
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.00029888119999999966,
"min": 0.00029888119999999966,
"max": 0.024044914,
"count": 40
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 40
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 8756.0,
"min": 8756.0,
"max": 10945.0,
"count": 40
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 26.136363636363637,
"min": 3.522727272727273,
"max": 26.4,
"count": 40
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1150.0,
"min": 155.0,
"max": 1452.0,
"count": 40
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 26.136363636363637,
"min": 3.522727272727273,
"max": 26.4,
"count": 40
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1150.0,
"min": 155.0,
"max": 1452.0,
"count": 40
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1742197226",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.6.0+cu124",
"numpy_version": "1.23.5",
"end_time_seconds": "1742198180"
},
"total": 953.8279832569999,
"count": 1,
"self": 0.43778020299987475,
"children": {
"run_training.setup": {
"total": 0.023914718000014545,
"count": 1,
"self": 0.023914718000014545
},
"TrainerController.start_learning": {
"total": 953.366288336,
"count": 1,
"self": 0.9285308879786953,
"children": {
"TrainerController._reset_env": {
"total": 3.204070942000044,
"count": 1,
"self": 3.204070942000044
},
"TrainerController.advance": {
"total": 949.1391320300211,
"count": 36392,
"self": 0.949890618009249,
"children": {
"env_step": {
"total": 686.1271063849961,
"count": 36392,
"self": 522.1558481460359,
"children": {
"SubprocessEnvManager._take_step": {
"total": 163.42537430795983,
"count": 36392,
"self": 3.03818298997453,
"children": {
"TorchPolicy.evaluate": {
"total": 160.3871913179853,
"count": 36392,
"self": 160.3871913179853
}
}
},
"workers": {
"total": 0.5458839310003896,
"count": 36392,
"self": 0.0,
"children": {
"worker_root": {
"total": 950.4496576520266,
"count": 36392,
"is_parallel": true,
"self": 495.1256380730458,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.006921033000026,
"count": 1,
"is_parallel": true,
"self": 0.0052307819998986815,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016902510001273185,
"count": 10,
"is_parallel": true,
"self": 0.0016902510001273185
}
}
},
"UnityEnvironment.step": {
"total": 0.03622114900008455,
"count": 1,
"is_parallel": true,
"self": 0.0006235419998574798,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004439660001480661,
"count": 1,
"is_parallel": true,
"self": 0.0004439660001480661
},
"communicator.exchange": {
"total": 0.033114217000047574,
"count": 1,
"is_parallel": true,
"self": 0.033114217000047574
},
"steps_from_proto": {
"total": 0.002039424000031431,
"count": 1,
"is_parallel": true,
"self": 0.00042866100011451636,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016107629999169149,
"count": 10,
"is_parallel": true,
"self": 0.0016107629999169149
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 455.3240195789808,
"count": 36391,
"is_parallel": true,
"self": 21.69445709602701,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 12.078641510985562,
"count": 36391,
"is_parallel": true,
"self": 12.078641510985562
},
"communicator.exchange": {
"total": 349.91225058399414,
"count": 36391,
"is_parallel": true,
"self": 349.91225058399414
},
"steps_from_proto": {
"total": 71.63867038797412,
"count": 36391,
"is_parallel": true,
"self": 13.315650872966216,
"children": {
"_process_rank_one_or_two_observation": {
"total": 58.3230195150079,
"count": 363910,
"is_parallel": true,
"self": 58.3230195150079
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 262.06213502701576,
"count": 36392,
"self": 1.1852753800205846,
"children": {
"process_trajectory": {
"total": 58.84780541299824,
"count": 36392,
"self": 57.92293850599822,
"children": {
"RLTrainer._checkpoint": {
"total": 0.9248669070000233,
"count": 8,
"self": 0.9248669070000233
}
}
},
"_update_policy": {
"total": 202.02905423399693,
"count": 181,
"self": 80.42472114601128,
"children": {
"TorchPPOOptimizer.update": {
"total": 121.60433308798565,
"count": 9228,
"self": 121.60433308798565
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.55000359681435e-07,
"count": 1,
"self": 9.55000359681435e-07
},
"TrainerController._save_models": {
"total": 0.09455352099985248,
"count": 1,
"self": 0.0012478539997573534,
"children": {
"RLTrainer._checkpoint": {
"total": 0.09330566700009513,
"count": 1,
"self": 0.09330566700009513
}
}
}
}
}
}
}