emylrahim's picture
First Pyramids RND Training
8311b08
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.33837178349494934,
"min": 0.3371018171310425,
"max": 1.4698516130447388,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 10086.1865234375,
"min": 10086.1865234375,
"max": 44589.41796875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989885.0,
"min": 29952.0,
"max": 989885.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989885.0,
"min": 29952.0,
"max": 989885.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.6027913689613342,
"min": -0.09419212490320206,
"max": 0.6027913689613342,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 168.78158569335938,
"min": -22.700302124023438,
"max": 168.78158569335938,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.019413989037275314,
"min": 0.0039646499790251255,
"max": 0.3826637864112854,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 5.435916900634766,
"min": 1.086314082145691,
"max": 90.69131469726562,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06906098967471228,
"min": 0.06314192169932233,
"max": 0.07364952112041792,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9668538554459719,
"min": 0.4936774280703367,
"max": 1.056691900555355,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01590924158986496,
"min": 0.0004320695348182958,
"max": 0.01785214621313138,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.22272938225810945,
"min": 0.005616903952637846,
"max": 0.26013686064420743,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.545504627721427e-06,
"min": 7.545504627721427e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010563706478809998,
"min": 0.00010563706478809998,
"max": 0.0035064302311899995,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10251513571428572,
"min": 0.10251513571428572,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4352119,
"min": 1.3886848,
"max": 2.5725451,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00026126205785714286,
"min": 0.00026126205785714286,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.00365766881,
"min": 0.00365766881,
"max": 0.11690411899999999,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.013215035200119019,
"min": 0.013215035200119019,
"max": 0.5090944766998291,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.18501049280166626,
"min": 0.18501049280166626,
"max": 3.5636613368988037,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 299.80898876404495,
"min": 299.80898876404495,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 26683.0,
"min": 15984.0,
"max": 33129.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.655231446697471,
"min": -1.0000000521540642,
"max": 1.655231446697471,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 147.3155987560749,
"min": -31.992401644587517,
"max": 165.16939850896597,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.655231446697471,
"min": -1.0000000521540642,
"max": 1.655231446697471,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 147.3155987560749,
"min": -31.992401644587517,
"max": 165.16939850896597,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.040885104842907805,
"min": 0.040885104842907805,
"max": 10.291501241736114,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.6387743310187943,
"min": 3.6387743310187943,
"max": 164.66401986777782,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1677997174",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.22.4",
"end_time_seconds": "1677999360"
},
"total": 2186.230609959,
"count": 1,
"self": 0.4435650620002889,
"children": {
"run_training.setup": {
"total": 0.17320095799999535,
"count": 1,
"self": 0.17320095799999535
},
"TrainerController.start_learning": {
"total": 2185.6138439389997,
"count": 1,
"self": 1.247823589998461,
"children": {
"TrainerController._reset_env": {
"total": 9.768474657000013,
"count": 1,
"self": 9.768474657000013
},
"TrainerController.advance": {
"total": 2174.5100418900015,
"count": 63863,
"self": 1.3504252120055753,
"children": {
"env_step": {
"total": 1449.0138763280095,
"count": 63863,
"self": 1339.8210295609988,
"children": {
"SubprocessEnvManager._take_step": {
"total": 108.41860024196461,
"count": 63863,
"self": 4.562224165961084,
"children": {
"TorchPolicy.evaluate": {
"total": 103.85637607600353,
"count": 62560,
"self": 35.23890970399117,
"children": {
"TorchPolicy.sample_actions": {
"total": 68.61746637201236,
"count": 62560,
"self": 68.61746637201236
}
}
}
}
},
"workers": {
"total": 0.7742465250461237,
"count": 63863,
"self": 0.0,
"children": {
"worker_root": {
"total": 2181.316357008027,
"count": 63863,
"is_parallel": true,
"self": 952.8709664680625,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.007113193999998657,
"count": 1,
"is_parallel": true,
"self": 0.005350957000018752,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0017622369999799048,
"count": 8,
"is_parallel": true,
"self": 0.0017622369999799048
}
}
},
"UnityEnvironment.step": {
"total": 0.04697200300000759,
"count": 1,
"is_parallel": true,
"self": 0.00054206999999451,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004795720000174697,
"count": 1,
"is_parallel": true,
"self": 0.0004795720000174697
},
"communicator.exchange": {
"total": 0.04440027499998678,
"count": 1,
"is_parallel": true,
"self": 0.04440027499998678
},
"steps_from_proto": {
"total": 0.0015500860000088323,
"count": 1,
"is_parallel": true,
"self": 0.000382766999962314,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0011673190000465183,
"count": 8,
"is_parallel": true,
"self": 0.0011673190000465183
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1228.4453905399644,
"count": 63862,
"is_parallel": true,
"self": 30.787468635005325,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.257678546959482,
"count": 63862,
"is_parallel": true,
"self": 22.257678546959482
},
"communicator.exchange": {
"total": 1086.916001399989,
"count": 63862,
"is_parallel": true,
"self": 1086.916001399989
},
"steps_from_proto": {
"total": 88.48424195801039,
"count": 63862,
"is_parallel": true,
"self": 20.705275710953742,
"children": {
"_process_rank_one_or_two_observation": {
"total": 67.77896624705664,
"count": 510896,
"is_parallel": true,
"self": 67.77896624705664
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 724.1457403499865,
"count": 63863,
"self": 2.512960108000925,
"children": {
"process_trajectory": {
"total": 159.37191239898567,
"count": 63863,
"self": 159.1650898319852,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2068225670004722,
"count": 2,
"self": 0.2068225670004722
}
}
},
"_update_policy": {
"total": 562.2608678429998,
"count": 453,
"self": 217.50272035598647,
"children": {
"TorchPPOOptimizer.update": {
"total": 344.75814748701333,
"count": 22785,
"self": 344.75814748701333
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.550000529794488e-07,
"count": 1,
"self": 8.550000529794488e-07
},
"TrainerController._save_models": {
"total": 0.08750294699984806,
"count": 1,
"self": 0.001381342000058794,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08612160499978927,
"count": 1,
"self": 0.08612160499978927
}
}
}
}
}
}
}