ppo-PyramidRND1 / run_logs /timers.json
kitrakrev's picture
First Push
478102c
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3074534237384796,
"min": 0.304328054189682,
"max": 1.4217654466629028,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 9326.9072265625,
"min": 9090.8876953125,
"max": 43130.67578125,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989989.0,
"min": 29952.0,
"max": 989989.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989989.0,
"min": 29952.0,
"max": 989989.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.6338394284248352,
"min": -0.08499982953071594,
"max": 0.6441158652305603,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 181.278076171875,
"min": -20.48495864868164,
"max": 181.278076171875,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.03823057562112808,
"min": -0.03823057562112808,
"max": 0.5688163638114929,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -10.933944702148438,
"min": -10.933944702148438,
"max": 134.80947875976562,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06708642212297057,
"min": 0.06505055476390267,
"max": 0.07564832314952889,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.939209909721588,
"min": 0.5295382620467023,
"max": 1.0552130969784534,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.016473313522597198,
"min": 0.0010617629750453432,
"max": 0.016601059154102888,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.23062638931636079,
"min": 0.013802918675589462,
"max": 0.2337094034980206,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.338097554000003e-06,
"min": 7.338097554000003e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010273336575600004,
"min": 0.00010273336575600004,
"max": 0.003757779147407,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.102446,
"min": 0.102446,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4342439999999999,
"min": 1.3886848,
"max": 2.652593,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00025435540000000003,
"min": 0.00025435540000000003,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035609756000000007,
"min": 0.0035609756000000007,
"max": 0.1252740407,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.015700189396739006,
"min": 0.015511236153542995,
"max": 0.6338075399398804,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.21980266273021698,
"min": 0.21980266273021698,
"max": 4.436652660369873,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 285.74257425742576,
"min": 285.74257425742576,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 28860.0,
"min": 15984.0,
"max": 33301.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.674637607713737,
"min": -1.0000000521540642,
"max": 1.679838697195694,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 169.13839837908745,
"min": -28.327601701021194,
"max": 169.13839837908745,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.674637607713737,
"min": -1.0000000521540642,
"max": 1.679838697195694,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 169.13839837908745,
"min": -28.327601701021194,
"max": 169.13839837908745,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.046743068348882616,
"min": 0.046743068348882616,
"max": 12.49155312217772,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 4.7210499032371445,
"min": 4.4826953680603765,
"max": 199.86484995484352,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1684941555",
"python_version": "3.10.11 (main, Apr 5 2023, 14:15:10) [GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1684943916"
},
"total": 2361.082977588,
"count": 1,
"self": 0.5268593840005451,
"children": {
"run_training.setup": {
"total": 0.060949369999889313,
"count": 1,
"self": 0.060949369999889313
},
"TrainerController.start_learning": {
"total": 2360.4951688339997,
"count": 1,
"self": 1.4192540480030402,
"children": {
"TrainerController._reset_env": {
"total": 4.640967050000199,
"count": 1,
"self": 4.640967050000199
},
"TrainerController.advance": {
"total": 2354.337002139997,
"count": 64121,
"self": 1.4004121871294046,
"children": {
"env_step": {
"total": 1685.2040359429811,
"count": 64121,
"self": 1570.406823083979,
"children": {
"SubprocessEnvManager._take_step": {
"total": 113.9602362670089,
"count": 64121,
"self": 5.04118202996051,
"children": {
"TorchPolicy.evaluate": {
"total": 108.91905423704839,
"count": 62555,
"self": 108.91905423704839
}
}
},
"workers": {
"total": 0.8369765919933343,
"count": 64121,
"self": 0.0,
"children": {
"worker_root": {
"total": 2355.0968121679653,
"count": 64121,
"is_parallel": true,
"self": 901.9391971760228,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0031184549998215516,
"count": 1,
"is_parallel": true,
"self": 0.0011016139997082064,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0020168410001133452,
"count": 8,
"is_parallel": true,
"self": 0.0020168410001133452
}
}
},
"UnityEnvironment.step": {
"total": 0.05251872099961474,
"count": 1,
"is_parallel": true,
"self": 0.0005748259995925764,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.000500588999784668,
"count": 1,
"is_parallel": true,
"self": 0.000500588999784668
},
"communicator.exchange": {
"total": 0.04960865200018816,
"count": 1,
"is_parallel": true,
"self": 0.04960865200018816
},
"steps_from_proto": {
"total": 0.0018346540000493405,
"count": 1,
"is_parallel": true,
"self": 0.00037448900047820644,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001460164999571134,
"count": 8,
"is_parallel": true,
"self": 0.001460164999571134
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1453.1576149919424,
"count": 64120,
"is_parallel": true,
"self": 33.292535639149264,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 23.774264530934033,
"count": 64120,
"is_parallel": true,
"self": 23.774264530934033
},
"communicator.exchange": {
"total": 1292.4545553009493,
"count": 64120,
"is_parallel": true,
"self": 1292.4545553009493
},
"steps_from_proto": {
"total": 103.63625952090979,
"count": 64120,
"is_parallel": true,
"self": 21.223096816928773,
"children": {
"_process_rank_one_or_two_observation": {
"total": 82.41316270398102,
"count": 512960,
"is_parallel": true,
"self": 82.41316270398102
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 667.7325540098864,
"count": 64121,
"self": 2.7623129258277004,
"children": {
"process_trajectory": {
"total": 112.92910790605902,
"count": 64121,
"self": 112.719067054059,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2100408520000201,
"count": 2,
"self": 0.2100408520000201
}
}
},
"_update_policy": {
"total": 552.0411331779997,
"count": 460,
"self": 353.8327037869749,
"children": {
"TorchPPOOptimizer.update": {
"total": 198.20842939102477,
"count": 22818,
"self": 198.20842939102477
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.1239999366807751e-06,
"count": 1,
"self": 1.1239999366807751e-06
},
"TrainerController._save_models": {
"total": 0.09794447199965362,
"count": 1,
"self": 0.0014649849990746588,
"children": {
"RLTrainer._checkpoint": {
"total": 0.09647948700057896,
"count": 1,
"self": 0.09647948700057896
}
}
}
}
}
}
}