saoliuhong's picture
FirstPush pyramids
1e9b56f verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.4314938485622406,
"min": 0.4202772080898285,
"max": 1.4853843450546265,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 12965.52734375,
"min": 12601.591796875,
"max": 45060.62109375,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989946.0,
"min": 29952.0,
"max": 989946.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989946.0,
"min": 29952.0,
"max": 989946.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.4056844413280487,
"min": -0.12622413039207458,
"max": 0.4568139612674713,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 108.72342681884766,
"min": -30.420015335083008,
"max": 123.33976745605469,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.032088108360767365,
"min": -0.012668116018176079,
"max": 0.2522522509098053,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 8.599613189697266,
"min": -3.2810420989990234,
"max": 60.54054260253906,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06754930215399889,
"min": 0.06519511177495675,
"max": 0.0763235232451195,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9456902301559845,
"min": 0.4935783272988468,
"max": 1.028994964377489,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.0137687144098371,
"min": 0.00041965674628488336,
"max": 0.0137687144098371,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.1927620017377194,
"min": 0.005455537701703483,
"max": 0.1927620017377194,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.2693975768999995e-06,
"min": 7.2693975768999995e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0001017715660766,
"min": 0.0001017715660766,
"max": 0.003508048430650599,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1024231,
"min": 0.1024231,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4339234,
"min": 1.3691136000000002,
"max": 2.5693494,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00025206769000000003,
"min": 0.00025206769000000003,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035289476600000006,
"min": 0.0035289476600000006,
"max": 0.11695800506,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.009492260403931141,
"min": 0.009492260403931141,
"max": 0.45413970947265625,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.13289164006710052,
"min": 0.13289164006710052,
"max": 3.1789779663085938,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 448.36764705882354,
"min": 416.875,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 30489.0,
"min": 15984.0,
"max": 32633.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.3216289620900499,
"min": -1.0000000521540642,
"max": 1.524261946829272,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 91.19239838421345,
"min": -32.000001668930054,
"max": 108.22259822487831,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.3216289620900499,
"min": -1.0000000521540642,
"max": 1.524261946829272,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 91.19239838421345,
"min": -32.000001668930054,
"max": 108.22259822487831,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.04475574411343837,
"min": 0.04344303533819161,
"max": 9.99684015288949,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.0881463438272476,
"min": 2.905700965449796,
"max": 159.94944244623184,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1706107106",
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.1.2+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1706109049"
},
"total": 1942.8102867409998,
"count": 1,
"self": 0.4897458599998572,
"children": {
"run_training.setup": {
"total": 0.04681848800009902,
"count": 1,
"self": 0.04681848800009902
},
"TrainerController.start_learning": {
"total": 1942.2737223929998,
"count": 1,
"self": 1.229054410048775,
"children": {
"TrainerController._reset_env": {
"total": 2.057732993999707,
"count": 1,
"self": 2.057732993999707
},
"TrainerController.advance": {
"total": 1938.9048683149522,
"count": 63412,
"self": 1.327224587065757,
"children": {
"env_step": {
"total": 1339.4808646270503,
"count": 63412,
"self": 1218.553998599863,
"children": {
"SubprocessEnvManager._take_step": {
"total": 120.181522234162,
"count": 63412,
"self": 4.431840186038244,
"children": {
"TorchPolicy.evaluate": {
"total": 115.74968204812376,
"count": 62557,
"self": 115.74968204812376
}
}
},
"workers": {
"total": 0.7453437930253131,
"count": 63412,
"self": 0.0,
"children": {
"worker_root": {
"total": 1937.55002456484,
"count": 63412,
"is_parallel": true,
"self": 825.7675909877798,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.00223433799965278,
"count": 1,
"is_parallel": true,
"self": 0.0006270060011956957,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016073319984570844,
"count": 8,
"is_parallel": true,
"self": 0.0016073319984570844
}
}
},
"UnityEnvironment.step": {
"total": 0.05124253299982229,
"count": 1,
"is_parallel": true,
"self": 0.0005981780004731263,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00048091099961311556,
"count": 1,
"is_parallel": true,
"self": 0.00048091099961311556
},
"communicator.exchange": {
"total": 0.04852933599977405,
"count": 1,
"is_parallel": true,
"self": 0.04852933599977405
},
"steps_from_proto": {
"total": 0.0016341079999619978,
"count": 1,
"is_parallel": true,
"self": 0.0003459529989413568,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001288155001020641,
"count": 8,
"is_parallel": true,
"self": 0.001288155001020641
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1111.7824335770601,
"count": 63411,
"is_parallel": true,
"self": 34.31404999100323,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 23.84007114607175,
"count": 63411,
"is_parallel": true,
"self": 23.84007114607175
},
"communicator.exchange": {
"total": 958.5466695599998,
"count": 63411,
"is_parallel": true,
"self": 958.5466695599998
},
"steps_from_proto": {
"total": 95.08164287998534,
"count": 63411,
"is_parallel": true,
"self": 18.490796324655093,
"children": {
"_process_rank_one_or_two_observation": {
"total": 76.59084655533024,
"count": 507288,
"is_parallel": true,
"self": 76.59084655533024
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 598.0967791008361,
"count": 63412,
"self": 2.2720186626911527,
"children": {
"process_trajectory": {
"total": 117.47575026514187,
"count": 63412,
"self": 117.28907509814235,
"children": {
"RLTrainer._checkpoint": {
"total": 0.18667516699952102,
"count": 2,
"self": 0.18667516699952102
}
}
},
"_update_policy": {
"total": 478.3490101730031,
"count": 442,
"self": 280.7663611540056,
"children": {
"TorchPPOOptimizer.update": {
"total": 197.58264901899747,
"count": 22863,
"self": 197.58264901899747
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.849991329247132e-07,
"count": 1,
"self": 9.849991329247132e-07
},
"TrainerController._save_models": {
"total": 0.0820656890000464,
"count": 1,
"self": 0.0013846769998053787,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08068101200024103,
"count": 1,
"self": 0.08068101200024103
}
}
}
}
}
}
}