mpetrikov's picture
Pyramid training first attempt
ed8bfd0
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.5329699516296387,
"min": 0.5329699516296387,
"max": 1.4730124473571777,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 15869.712890625,
"min": 15869.712890625,
"max": 44685.3046875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989925.0,
"min": 29995.0,
"max": 989925.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989925.0,
"min": 29995.0,
"max": 989925.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.32527124881744385,
"min": -0.09819067269563675,
"max": 0.47146105766296387,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 83.91998291015625,
"min": -23.663951873779297,
"max": 129.18032836914062,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.024481432512402534,
"min": -0.024481432512402534,
"max": 0.30700773000717163,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -6.31620979309082,
"min": -6.31620979309082,
"max": 73.06784057617188,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06402258796261785,
"min": 0.06402258796261785,
"max": 0.07285836355489059,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.8963162314766498,
"min": 0.5068387104435516,
"max": 1.0819015715769993,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.014020777585812199,
"min": 0.0004945200313595392,
"max": 0.01688305553907616,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.19629088620137078,
"min": 0.0064287604076740095,
"max": 0.23636277754706625,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.663468874114293e-06,
"min": 7.663468874114293e-06,
"max": 0.0002952365587306714,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0001072885642376001,
"min": 0.0001072885642376001,
"max": 0.003492923535692199,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10255445714285714,
"min": 0.10255445714285714,
"max": 0.1984121857142857,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4357624,
"min": 1.3888852999999999,
"max": 2.4859766000000003,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002651902685714289,
"min": 0.0002651902685714289,
"max": 0.009841377352857143,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0037126637600000045,
"min": 0.0037126637600000045,
"max": 0.11644434922,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.006034837570041418,
"min": 0.005967663135379553,
"max": 0.42052653431892395,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.08448772877454758,
"min": 0.08354728668928146,
"max": 2.94368577003479,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 471.6034482758621,
"min": 369.746835443038,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 27353.0,
"min": 16538.0,
"max": 33182.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.2179482425081318,
"min": -1.0000000521540642,
"max": 1.5036455543735359,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 70.64099806547165,
"min": -32.000001668930054,
"max": 118.78799879550934,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.2179482425081318,
"min": -1.0000000521540642,
"max": 1.5036455543735359,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 70.64099806547165,
"min": -32.000001668930054,
"max": 118.78799879550934,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.030007275076310055,
"min": 0.024380439864044208,
"max": 8.448245289132876,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 1.7404219544259831,
"min": 1.7404219544259831,
"max": 143.62016991525888,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1688797799",
"python_version": "3.10.12 (main, Jun 7 2023, 12:45:35) [GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1688799986"
},
"total": 2186.313541026,
"count": 1,
"self": 0.4897000010000738,
"children": {
"run_training.setup": {
"total": 0.04080620900003851,
"count": 1,
"self": 0.04080620900003851
},
"TrainerController.start_learning": {
"total": 2185.783034816,
"count": 1,
"self": 1.4199118919773355,
"children": {
"TrainerController._reset_env": {
"total": 4.284366712000008,
"count": 1,
"self": 4.284366712000008
},
"TrainerController.advance": {
"total": 2179.9744471570225,
"count": 63427,
"self": 1.5591114430030757,
"children": {
"env_step": {
"total": 1525.8184151120306,
"count": 63427,
"self": 1412.0890925360115,
"children": {
"SubprocessEnvManager._take_step": {
"total": 112.87707544801049,
"count": 63427,
"self": 4.832244222003965,
"children": {
"TorchPolicy.evaluate": {
"total": 108.04483122600652,
"count": 62553,
"self": 108.04483122600652
}
}
},
"workers": {
"total": 0.852247128008571,
"count": 63427,
"self": 0.0,
"children": {
"worker_root": {
"total": 2180.744330373029,
"count": 63427,
"is_parallel": true,
"self": 884.9899538890168,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.00586359000004677,
"count": 1,
"is_parallel": true,
"self": 0.003860943000063344,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002002646999983426,
"count": 8,
"is_parallel": true,
"self": 0.002002646999983426
}
}
},
"UnityEnvironment.step": {
"total": 0.05123972199999116,
"count": 1,
"is_parallel": true,
"self": 0.0006805179999105349,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005460259999949812,
"count": 1,
"is_parallel": true,
"self": 0.0005460259999949812
},
"communicator.exchange": {
"total": 0.047802027000045655,
"count": 1,
"is_parallel": true,
"self": 0.047802027000045655
},
"steps_from_proto": {
"total": 0.0022111510000399903,
"count": 1,
"is_parallel": true,
"self": 0.0004609780000350838,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0017501730000049065,
"count": 8,
"is_parallel": true,
"self": 0.0017501730000049065
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1295.754376484012,
"count": 63426,
"is_parallel": true,
"self": 34.104387803033205,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.455942265036242,
"count": 63426,
"is_parallel": true,
"self": 22.455942265036242
},
"communicator.exchange": {
"total": 1134.6084352569733,
"count": 63426,
"is_parallel": true,
"self": 1134.6084352569733
},
"steps_from_proto": {
"total": 104.58561115896919,
"count": 63426,
"is_parallel": true,
"self": 20.158696654981156,
"children": {
"_process_rank_one_or_two_observation": {
"total": 84.42691450398803,
"count": 507408,
"is_parallel": true,
"self": 84.42691450398803
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 652.5969206019888,
"count": 63427,
"self": 2.69544794395415,
"children": {
"process_trajectory": {
"total": 109.69737310903525,
"count": 63427,
"self": 109.48599787503537,
"children": {
"RLTrainer._checkpoint": {
"total": 0.21137523399988822,
"count": 2,
"self": 0.21137523399988822
}
}
},
"_update_policy": {
"total": 540.2040995489995,
"count": 449,
"self": 348.2165695980092,
"children": {
"TorchPPOOptimizer.update": {
"total": 191.98752995099028,
"count": 22764,
"self": 191.98752995099028
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.27099974633893e-06,
"count": 1,
"self": 1.27099974633893e-06
},
"TrainerController._save_models": {
"total": 0.10430778400041163,
"count": 1,
"self": 0.0014450990006480424,
"children": {
"RLTrainer._checkpoint": {
"total": 0.10286268499976359,
"count": 1,
"self": 0.10286268499976359
}
}
}
}
}
}
}