ppo-PyramidsRND / run_logs /timers.json
younus00's picture
Third Push
7898fc1 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3328269422054291,
"min": 0.3292362093925476,
"max": 0.3599614202976227,
"count": 5
},
"Pyramids.Policy.Entropy.sum": {
"value": 9984.80859375,
"min": 3898.643798828125,
"max": 10752.767578125,
"count": 5
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 322.57608695652175,
"min": 232.48387096774192,
"max": 330.46153846153845,
"count": 5
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29677.0,
"min": 7207.0,
"max": 30832.0,
"count": 5
},
"Pyramids.Step.mean": {
"value": 1379891.0,
"min": 1259962.0,
"max": 1379891.0,
"count": 5
},
"Pyramids.Step.sum": {
"value": 1379891.0,
"min": 1259962.0,
"max": 1379891.0,
"count": 5
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.6158922910690308,
"min": 0.5918439626693726,
"max": 0.7013688087463379,
"count": 5
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 168.75448608398438,
"min": 57.81498336791992,
"max": 205.50106811523438,
"count": 5
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.00969297531992197,
"min": -0.036272455006837845,
"max": 0.00969297531992197,
"count": 5
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 2.6558752059936523,
"min": -5.330055236816406,
"max": 2.6558752059936523,
"count": 5
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.6121956317321113,
"min": 1.6035845981358172,
"max": 1.7675161251137335,
"count": 5
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 148.32199811935425,
"min": 54.792999878525734,
"max": 175.1673981845379,
"count": 5
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.6121956317321113,
"min": 1.6035845981358172,
"max": 1.7675161251137335,
"count": 5
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 148.32199811935425,
"min": 54.792999878525734,
"max": 175.1673981845379,
"count": 5
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.04354281095275719,
"min": 0.03256342801520781,
"max": 0.04500835418174584,
"count": 5
},
"Pyramids.Policy.RndReward.sum": {
"value": 4.005938607653661,
"min": 1.0094662684714422,
"max": 4.332612534111831,
"count": 5
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06888716422309657,
"min": 0.06757179921745167,
"max": 0.06888716422309657,
"count": 5
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9644202991233519,
"min": 0.2702871968698067,
"max": 1.017949348965582,
"count": 5
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013390507737529422,
"min": 0.013355786696289822,
"max": 0.014255686707439876,
"count": 5
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.1874671083254119,
"min": 0.054010328371077776,
"max": 0.21077887569360124,
"count": 5
},
"Pyramids.Policy.LearningRate.mean": {
"value": 2.701789099406667e-05,
"min": 2.701789099406667e-05,
"max": 4.893973368678333e-05,
"count": 5
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00037825047391693337,
"min": 0.00019575893474713333,
"max": 0.0006745104751636,
"count": 5
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10900593333333333,
"min": 0.10900593333333333,
"max": 0.11631321666666666,
"count": 5
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.5260830666666667,
"min": 0.46525286666666665,
"max": 1.7248364,
"count": 5
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0009096927400000003,
"min": 0.0009096927400000003,
"max": 0.0016396903450000002,
"count": 5
},
"Pyramids.Policy.Beta.sum": {
"value": 0.012735698360000004,
"min": 0.006558761380000001,
"max": 0.022611156360000006,
"count": 5
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.013081449083983898,
"min": 0.013081449083983898,
"max": 0.013701974414288998,
"count": 5
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.18314029276371002,
"min": 0.0545964390039444,
"max": 0.2055296152830124,
"count": 5
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 5
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1756997345",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.8.0+cu128",
"numpy_version": "1.23.5",
"end_time_seconds": "1756997698"
},
"total": 353.027487286,
"count": 1,
"self": 0.448277577999761,
"children": {
"run_training.setup": {
"total": 0.021634278999954404,
"count": 1,
"self": 0.021634278999954404
},
"TrainerController.start_learning": {
"total": 352.5575754290003,
"count": 1,
"self": 0.17634569501478836,
"children": {
"TrainerController._reset_env": {
"total": 2.09601739000027,
"count": 1,
"self": 2.09601739000027
},
"TrainerController.advance": {
"total": 350.28426735698486,
"count": 9706,
"self": 0.18153609004002647,
"children": {
"env_step": {
"total": 255.4942181939623,
"count": 9706,
"self": 235.39306379204572,
"children": {
"SubprocessEnvManager._take_step": {
"total": 19.992226117946302,
"count": 9706,
"self": 0.642642519893343,
"children": {
"TorchPolicy.evaluate": {
"total": 19.34958359805296,
"count": 9337,
"self": 19.34958359805296
}
}
},
"workers": {
"total": 0.10892828397027188,
"count": 9705,
"self": 0.0,
"children": {
"worker_root": {
"total": 351.95323861191355,
"count": 9705,
"is_parallel": true,
"self": 132.47989358791074,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0020756940002684132,
"count": 1,
"is_parallel": true,
"self": 0.0006418019993361668,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014338920009322464,
"count": 8,
"is_parallel": true,
"self": 0.0014338920009322464
}
}
},
"UnityEnvironment.step": {
"total": 0.04797280300044804,
"count": 1,
"is_parallel": true,
"self": 0.0004953019997628871,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00046361600016098237,
"count": 1,
"is_parallel": true,
"self": 0.00046361600016098237
},
"communicator.exchange": {
"total": 0.045392280000669416,
"count": 1,
"is_parallel": true,
"self": 0.045392280000669416
},
"steps_from_proto": {
"total": 0.0016216049998547533,
"count": 1,
"is_parallel": true,
"self": 0.0003517840004860773,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001269820999368676,
"count": 8,
"is_parallel": true,
"self": 0.001269820999368676
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 219.4733450240028,
"count": 9704,
"is_parallel": true,
"self": 4.5904916139415946,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3.1841592779810526,
"count": 9704,
"is_parallel": true,
"self": 3.1841592779810526
},
"communicator.exchange": {
"total": 198.2160626910627,
"count": 9704,
"is_parallel": true,
"self": 198.2160626910627
},
"steps_from_proto": {
"total": 13.482631441017475,
"count": 9704,
"is_parallel": true,
"self": 2.5699725949016283,
"children": {
"_process_rank_one_or_two_observation": {
"total": 10.912658846115846,
"count": 77632,
"is_parallel": true,
"self": 10.912658846115846
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 94.60851307298253,
"count": 9705,
"self": 0.37175510995894,
"children": {
"process_trajectory": {
"total": 17.921697307027898,
"count": 9705,
"self": 17.921697307027898
},
"_update_policy": {
"total": 76.3150606559957,
"count": 70,
"self": 43.130905895998694,
"children": {
"TorchPPOOptimizer.update": {
"total": 33.184154759997,
"count": 3363,
"self": 33.184154759997
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.5040004655020311e-06,
"count": 1,
"self": 1.5040004655020311e-06
},
"TrainerController._save_models": {
"total": 0.0009434829999008798,
"count": 1,
"self": 2.3206000150821637e-05,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0009202769997500582,
"count": 1,
"self": 0.0009202769997500582
}
}
}
}
}
}
}