lsaulier's picture
First ppo train of SnowballTarget
94ad815
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.7249734401702881,
"min": 0.7249734401702881,
"max": 2.8646671772003174,
"count": 50
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 7408.50341796875,
"min": 6643.49365234375,
"max": 30448.5078125,
"count": 50
},
"SnowballTarget.Step.mean": {
"value": 499912.0,
"min": 9952.0,
"max": 499912.0,
"count": 50
},
"SnowballTarget.Step.sum": {
"value": 499912.0,
"min": 9952.0,
"max": 499912.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.728642463684082,
"min": 0.30740585923194885,
"max": 13.887354850769043,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 1386.5928955078125,
"min": 29.81836700439453,
"max": 1421.425537109375,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 8756.0,
"min": 8756.0,
"max": 10945.0,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 26.452830188679247,
"min": 3.3636363636363638,
"max": 27.347826086956523,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1402.0,
"min": 148.0,
"max": 1503.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 26.452830188679247,
"min": 3.3636363636363638,
"max": 27.347826086956523,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1402.0,
"min": 148.0,
"max": 1503.0,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.06120750745050792,
"min": 0.05969955955926494,
"max": 0.07628246723399178,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.30603753725253957,
"min": 0.24051313562114385,
"max": 0.3750896338344818,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.18039818657379522,
"min": 0.1220061824492672,
"max": 0.28672271221876144,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.9019909328689761,
"min": 0.4880247297970688,
"max": 1.4238238658975155,
"count": 50
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 3.000099000000003e-06,
"min": 3.000099000000003e-06,
"max": 0.0002967000011,
"count": 50
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 1.5000495000000015e-05,
"min": 1.5000495000000015e-05,
"max": 0.0014538000154,
"count": 50
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.101,
"min": 0.101,
"max": 0.19890000000000002,
"count": 50
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.505,
"min": 0.41192000000000006,
"max": 0.9846000000000001,
"count": 50
},
"SnowballTarget.Policy.Beta.mean": {
"value": 5.9900000000000047e-05,
"min": 5.9900000000000047e-05,
"max": 0.00494511,
"count": 50
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.00029950000000000023,
"min": 0.00029950000000000023,
"max": 0.024231540000000003,
"count": 50
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1674128348",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1674129461"
},
"total": 1113.444466723,
"count": 1,
"self": 0.39341124200018385,
"children": {
"run_training.setup": {
"total": 0.11675301299965213,
"count": 1,
"self": 0.11675301299965213
},
"TrainerController.start_learning": {
"total": 1112.9343024680002,
"count": 1,
"self": 1.4966997059595997,
"children": {
"TrainerController._reset_env": {
"total": 9.446732872000212,
"count": 1,
"self": 9.446732872000212
},
"TrainerController.advance": {
"total": 1101.8514318070406,
"count": 45536,
"self": 0.7379781470458511,
"children": {
"env_step": {
"total": 1101.1134536599948,
"count": 45536,
"self": 694.9175112369485,
"children": {
"SubprocessEnvManager._take_step": {
"total": 405.4819000740554,
"count": 45536,
"self": 3.896643957020842,
"children": {
"TorchPolicy.evaluate": {
"total": 401.58525611703453,
"count": 45536,
"self": 86.68116245693363,
"children": {
"TorchPolicy.sample_actions": {
"total": 314.9040936601009,
"count": 45536,
"self": 314.9040936601009
}
}
}
}
},
"workers": {
"total": 0.7140423489909153,
"count": 45536,
"self": 0.0,
"children": {
"worker_root": {
"total": 1109.378210641014,
"count": 45536,
"is_parallel": true,
"self": 518.2196938450347,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.00602965200005201,
"count": 1,
"is_parallel": true,
"self": 0.003495207000469236,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0025344449995827745,
"count": 10,
"is_parallel": true,
"self": 0.0025344449995827745
}
}
},
"UnityEnvironment.step": {
"total": 0.03881331799993859,
"count": 1,
"is_parallel": true,
"self": 0.0028520179998849926,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00048024199986684835,
"count": 1,
"is_parallel": true,
"self": 0.00048024199986684835
},
"communicator.exchange": {
"total": 0.03337929100007386,
"count": 1,
"is_parallel": true,
"self": 0.03337929100007386
},
"steps_from_proto": {
"total": 0.0021017670001128863,
"count": 1,
"is_parallel": true,
"self": 0.0004719510002360039,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016298159998768824,
"count": 10,
"is_parallel": true,
"self": 0.0016298159998768824
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 591.1585167959793,
"count": 45535,
"is_parallel": true,
"self": 22.15656391203629,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 13.919602987965845,
"count": 45535,
"is_parallel": true,
"self": 13.919602987965845
},
"communicator.exchange": {
"total": 464.1769297260089,
"count": 45535,
"is_parallel": true,
"self": 464.1769297260089
},
"steps_from_proto": {
"total": 90.90542016996824,
"count": 45535,
"is_parallel": true,
"self": 18.446465295910002,
"children": {
"_process_rank_one_or_two_observation": {
"total": 72.45895487405824,
"count": 455350,
"is_parallel": true,
"self": 72.45895487405824
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 4.565399967759731e-05,
"count": 1,
"self": 4.565399967759731e-05,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 1095.6659048388278,
"count": 739456,
"is_parallel": true,
"self": 20.594137157896057,
"children": {
"process_trajectory": {
"total": 517.7519642639313,
"count": 739456,
"is_parallel": true,
"self": 515.906177694932,
"children": {
"RLTrainer._checkpoint": {
"total": 1.8457865689993014,
"count": 10,
"is_parallel": true,
"self": 1.8457865689993014
}
}
},
"_update_policy": {
"total": 557.3198034170005,
"count": 227,
"is_parallel": true,
"self": 115.1290086940262,
"children": {
"TorchPPOOptimizer.update": {
"total": 442.19079472297426,
"count": 11574,
"is_parallel": true,
"self": 442.19079472297426
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.1393924290000541,
"count": 1,
"self": 0.0008527049999429437,
"children": {
"RLTrainer._checkpoint": {
"total": 0.13853972400011116,
"count": 1,
"self": 0.13853972400011116
}
}
}
}
}
}
}