Lahmus's picture
further training without RND
5844a56 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.21229027211666107,
"min": 0.2117043137550354,
"max": 0.45928406715393066,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 6348.328125,
"min": 6348.328125,
"max": 13844.6591796875,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 224.6124031007752,
"min": 224.6124031007752,
"max": 460.24285714285713,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 28975.0,
"min": 15241.0,
"max": 33246.0,
"count": 33
},
"Pyramids.Step.mean": {
"value": 1979990.0,
"min": 1019930.0,
"max": 1979990.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 1979990.0,
"min": 1019930.0,
"max": 1979990.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7928503751754761,
"min": 0.4259541630744934,
"max": 0.7928503751754761,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 236.2694091796875,
"min": 74.54197692871094,
"max": 236.2694091796875,
"count": 33
},
"Pyramids.Policy.CuriosityValueEstimate.mean": {
"value": 0.08150467276573181,
"min": 0.07122068107128143,
"max": 0.9572137594223022,
"count": 33
},
"Pyramids.Policy.CuriosityValueEstimate.sum": {
"value": 24.288393020629883,
"min": 20.867658615112305,
"max": 250.8644256591797,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.7598759609945984,
"min": 1.3477056824735232,
"max": 1.7610719833374024,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 227.0239989683032,
"min": 70.8859993070364,
"max": 227.0239989683032,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.7598759609945984,
"min": 1.3477056824735232,
"max": 1.7610719833374024,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 227.0239989683032,
"min": 70.8859993070364,
"max": 227.0239989683032,
"count": 33
},
"Pyramids.Policy.CuriosityReward.mean": {
"value": 0.1361309675361723,
"min": 0.13419253882020712,
"max": 8.166471420678981,
"count": 33
},
"Pyramids.Policy.CuriosityReward.sum": {
"value": 17.560894812166225,
"min": 16.77406735252589,
"max": 351.1582710891962,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06777143211054083,
"min": 0.06455791987166011,
"max": 0.0726872975951881,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9488000495475716,
"min": 0.6456835290203647,
"max": 1.0768589311046524,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.015221940508733193,
"min": 0.012300340361737955,
"max": 0.03165379860891133,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.21310716712226468,
"min": 0.17220476506433138,
"max": 0.28488418748020194,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 5.150198283299999e-06,
"min": 5.150198283299999e-06,
"max": 0.0001484157338614389,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 7.210277596619998e-05,
"min": 7.210277596619998e-05,
"max": 0.0020357597214137,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10171670000000001,
"min": 0.10171670000000001,
"max": 0.14947189444444448,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4240338000000001,
"min": 1.3452470500000002,
"max": 2.1785862999999996,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00018149832999999997,
"min": 0.00018149832999999997,
"max": 0.004952242254999999,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0025409766199999996,
"min": 0.0025409766199999996,
"max": 0.06794077137,
"count": 33
},
"Pyramids.Losses.CuriosityForwardLoss.mean": {
"value": 0.02933511003253183,
"min": 0.026961324524341357,
"max": 3.1736685284585864,
"count": 33
},
"Pyramids.Losses.CuriosityForwardLoss.sum": {
"value": 0.4106915404554456,
"min": 0.40441986786512035,
"max": 28.56301675612728,
"count": 33
},
"Pyramids.Losses.CuriosityInverseLoss.mean": {
"value": 0.029640588287293212,
"min": 0.029640588287293212,
"max": 0.3566509762571918,
"count": 33
},
"Pyramids.Losses.CuriosityInverseLoss.sum": {
"value": 0.414968236022105,
"min": 0.414968236022105,
"max": 3.209858786314726,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1763077443",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Pyramids.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids-Training --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.8.0+cu128",
"numpy_version": "1.23.5",
"end_time_seconds": "1763079927"
},
"total": 2483.756702148,
"count": 1,
"self": 0.4796895609997591,
"children": {
"run_training.setup": {
"total": 0.023456103000171424,
"count": 1,
"self": 0.023456103000171424
},
"TrainerController.start_learning": {
"total": 2483.253556484,
"count": 1,
"self": 1.293926397985615,
"children": {
"TrainerController._reset_env": {
"total": 2.227432239000109,
"count": 1,
"self": 2.227432239000109
},
"TrainerController.advance": {
"total": 2479.644286051014,
"count": 65357,
"self": 1.3399506230234692,
"children": {
"env_step": {
"total": 1728.4216181530269,
"count": 65357,
"self": 1585.745305189071,
"children": {
"SubprocessEnvManager._take_step": {
"total": 141.89125865498022,
"count": 65357,
"self": 4.438709465026932,
"children": {
"TorchPolicy.evaluate": {
"total": 137.4525491899533,
"count": 62564,
"self": 137.4525491899533
}
}
},
"workers": {
"total": 0.7850543089757593,
"count": 65357,
"self": 0.0,
"children": {
"worker_root": {
"total": 2477.0554484660784,
"count": 65357,
"is_parallel": true,
"self": 1005.134249613156,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0017356599998947786,
"count": 1,
"is_parallel": true,
"self": 0.0005625610001516179,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0011730989997431607,
"count": 8,
"is_parallel": true,
"self": 0.0011730989997431607
}
}
},
"UnityEnvironment.step": {
"total": 0.08275230200001715,
"count": 1,
"is_parallel": true,
"self": 0.00253729899964128,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00039391300015267916,
"count": 1,
"is_parallel": true,
"self": 0.00039391300015267916
},
"communicator.exchange": {
"total": 0.07826742000020204,
"count": 1,
"is_parallel": true,
"self": 0.07826742000020204
},
"steps_from_proto": {
"total": 0.0015536700000211567,
"count": 1,
"is_parallel": true,
"self": 0.00034412000013617217,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012095499998849846,
"count": 8,
"is_parallel": true,
"self": 0.0012095499998849846
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1471.9211988529223,
"count": 65356,
"is_parallel": true,
"self": 33.12207003098706,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.54704261998154,
"count": 65356,
"is_parallel": true,
"self": 22.54704261998154
},
"communicator.exchange": {
"total": 1311.9614781070145,
"count": 65356,
"is_parallel": true,
"self": 1311.9614781070145
},
"steps_from_proto": {
"total": 104.29060809493922,
"count": 65356,
"is_parallel": true,
"self": 22.1295470098878,
"children": {
"_process_rank_one_or_two_observation": {
"total": 82.16106108505141,
"count": 522848,
"is_parallel": true,
"self": 82.16106108505141
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 749.8827172749636,
"count": 65357,
"self": 2.731102277001355,
"children": {
"process_trajectory": {
"total": 121.75977067996405,
"count": 65357,
"self": 121.5736909379641,
"children": {
"RLTrainer._checkpoint": {
"total": 0.18607974199994715,
"count": 2,
"self": 0.18607974199994715
}
}
},
"_update_policy": {
"total": 625.3918443179982,
"count": 473,
"self": 405.6678370609952,
"children": {
"TorchPPOOptimizer.update": {
"total": 219.724007257003,
"count": 22752,
"self": 219.724007257003
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.879997039912269e-07,
"count": 1,
"self": 8.879997039912269e-07
},
"TrainerController._save_models": {
"total": 0.0879109080001399,
"count": 1,
"self": 0.0014348859999699926,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0864760220001699,
"count": 1,
"self": 0.0864760220001699
}
}
}
}
}
}
}