ppo-PyramidRnd / run_logs /timers.json
lbkzman's picture
First Push
3827662
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.7002893686294556,
"min": 0.6959943771362305,
"max": 1.4577556848526,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 21109.5234375,
"min": 21002.326171875,
"max": 44222.4765625,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989998.0,
"min": 29878.0,
"max": 989998.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989998.0,
"min": 29878.0,
"max": 989998.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.2995430827140808,
"min": -0.1097041592001915,
"max": 0.3210127055644989,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 78.48028564453125,
"min": -26.438701629638672,
"max": 83.14228820800781,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.024050449952483177,
"min": -0.0007179793319664896,
"max": 0.4773642122745514,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 6.301218032836914,
"min": -0.18308472633361816,
"max": 113.13531494140625,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.069975801535389,
"min": 0.06391836128540775,
"max": 0.07291522097871041,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.049637023030835,
"min": 0.5053019814939912,
"max": 1.0670726490207014,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01242068731720602,
"min": 0.0002721593999212204,
"max": 0.018423158639781206,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.1863103097580903,
"min": 0.003538072198975865,
"max": 0.1863103097580903,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.601437466220002e-06,
"min": 7.601437466220002e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00011402156199330004,
"min": 0.00011402156199330004,
"max": 0.003633570788809799,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10253378,
"min": 0.10253378,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.5380067000000002,
"min": 1.3886848,
"max": 2.6111902000000002,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.000263124622,
"min": 0.000263124622,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.00394686933,
"min": 0.00394686933,
"max": 0.12113790098,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.012630349025130272,
"min": 0.012630349025130272,
"max": 0.5004786849021912,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.18945524096488953,
"min": 0.1786191314458847,
"max": 3.5033507347106934,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 542.0181818181818,
"min": 492.14285714285717,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29811.0,
"min": 16805.0,
"max": 32903.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.2397526940161532,
"min": -0.9998387612642781,
"max": 1.2934963973239064,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 68.18639817088842,
"min": -30.99500159919262,
"max": 72.43579825013876,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.2397526940161532,
"min": -0.9998387612642781,
"max": 1.2934963973239064,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 68.18639817088842,
"min": -30.99500159919262,
"max": 72.43579825013876,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.06996654304675758,
"min": 0.06685642639266527,
"max": 9.777457848191261,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.848159867571667,
"min": 3.743959877989255,
"max": 166.21678341925144,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1699820444",
"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.1.0+cu118",
"numpy_version": "1.23.5",
"end_time_seconds": "1699822438"
},
"total": 1994.294518087,
"count": 1,
"self": 0.5311031159997128,
"children": {
"run_training.setup": {
"total": 0.04041239000002861,
"count": 1,
"self": 0.04041239000002861
},
"TrainerController.start_learning": {
"total": 1993.7230025810002,
"count": 1,
"self": 1.2563343350091145,
"children": {
"TrainerController._reset_env": {
"total": 3.818374812000002,
"count": 1,
"self": 3.818374812000002
},
"TrainerController.advance": {
"total": 1988.5663573359911,
"count": 63353,
"self": 1.3811684280497047,
"children": {
"env_step": {
"total": 1349.8789710549916,
"count": 63353,
"self": 1223.026211185944,
"children": {
"SubprocessEnvManager._take_step": {
"total": 126.08334574603623,
"count": 63353,
"self": 4.499536273989861,
"children": {
"TorchPolicy.evaluate": {
"total": 121.58380947204637,
"count": 62556,
"self": 121.58380947204637
}
}
},
"workers": {
"total": 0.7694141230113019,
"count": 63353,
"self": 0.0,
"children": {
"worker_root": {
"total": 1989.4029796299928,
"count": 63353,
"is_parallel": true,
"self": 879.043400398994,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0017906020000282297,
"count": 1,
"is_parallel": true,
"self": 0.0005886009996629582,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012020010003652715,
"count": 8,
"is_parallel": true,
"self": 0.0012020010003652715
}
}
},
"UnityEnvironment.step": {
"total": 0.07328977899987876,
"count": 1,
"is_parallel": true,
"self": 0.0005642389996864949,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00045317700005398365,
"count": 1,
"is_parallel": true,
"self": 0.00045317700005398365
},
"communicator.exchange": {
"total": 0.0706147430000783,
"count": 1,
"is_parallel": true,
"self": 0.0706147430000783
},
"steps_from_proto": {
"total": 0.0016576200000599783,
"count": 1,
"is_parallel": true,
"self": 0.00036556199984261184,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012920580002173665,
"count": 8,
"is_parallel": true,
"self": 0.0012920580002173665
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1110.3595792309989,
"count": 63352,
"is_parallel": true,
"self": 33.659349206088564,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.749531602982643,
"count": 63352,
"is_parallel": true,
"self": 22.749531602982643
},
"communicator.exchange": {
"total": 959.7732286329592,
"count": 63352,
"is_parallel": true,
"self": 959.7732286329592
},
"steps_from_proto": {
"total": 94.17746978896844,
"count": 63352,
"is_parallel": true,
"self": 18.25106498495984,
"children": {
"_process_rank_one_or_two_observation": {
"total": 75.9264048040086,
"count": 506816,
"is_parallel": true,
"self": 75.9264048040086
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 637.3062178529499,
"count": 63353,
"self": 2.3171692389737473,
"children": {
"process_trajectory": {
"total": 119.27919398997915,
"count": 63353,
"self": 119.1162790539795,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1629149359996518,
"count": 2,
"self": 0.1629149359996518
}
}
},
"_update_policy": {
"total": 515.709854623997,
"count": 448,
"self": 311.19620009402774,
"children": {
"TorchPPOOptimizer.update": {
"total": 204.51365452996924,
"count": 22773,
"self": 204.51365452996924
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.0440003279654775e-06,
"count": 1,
"self": 1.0440003279654775e-06
},
"TrainerController._save_models": {
"total": 0.08193505399958667,
"count": 1,
"self": 0.001434279999557475,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0805007740000292,
"count": 1,
"self": 0.0805007740000292
}
}
}
}
}
}
}