{ "name": "root", "gauges": { "Pyramids.Policy.Entropy.mean": { "value": 0.4246387779712677, "min": 0.4246387779712677, "max": 1.4456040859222412, "count": 33 }, "Pyramids.Policy.Entropy.sum": { "value": 12773.134765625, "min": 12773.134765625, "max": 43853.84375, "count": 33 }, "Pyramids.Step.mean": { "value": 989926.0, "min": 29952.0, "max": 989926.0, "count": 33 }, "Pyramids.Step.sum": { "value": 989926.0, "min": 29952.0, "max": 989926.0, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.mean": { "value": 0.5708710551261902, "min": -0.061649616807699203, "max": 0.6232691407203674, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.sum": { "value": 157.56040954589844, "min": -14.980856895446777, "max": 177.00843811035156, "count": 33 }, "Pyramids.Policy.RndValueEstimate.mean": { "value": -0.04511358216404915, "min": -0.04511358216404915, "max": 0.7476927042007446, "count": 33 }, "Pyramids.Policy.RndValueEstimate.sum": { "value": -12.451348304748535, "min": -12.451348304748535, "max": 177.2031707763672, "count": 33 }, "Pyramids.Losses.PolicyLoss.mean": { "value": 0.06698757979708413, "min": 0.06565299234630186, "max": 0.07363895688415457, "count": 33 }, "Pyramids.Losses.PolicyLoss.sum": { "value": 1.004813696956262, "min": 0.5150413518054303, "max": 1.0925647304781403, "count": 33 }, "Pyramids.Losses.ValueLoss.mean": { "value": 0.017095749339794897, "min": 0.0012846800181240889, "max": 0.02962426979588816, "count": 33 }, "Pyramids.Losses.ValueLoss.sum": { "value": 0.25643624009692345, "min": 0.012846800181240889, "max": 0.25643624009692345, "count": 33 }, "Pyramids.Policy.LearningRate.mean": { "value": 7.552817482426668e-06, "min": 7.552817482426668e-06, "max": 0.00029515063018788575, "count": 33 }, "Pyramids.Policy.LearningRate.sum": { "value": 0.00011329226223640002, "min": 0.00011329226223640002, "max": 0.0036347473884176, "count": 33 }, "Pyramids.Policy.Epsilon.mean": { "value": 0.10251757333333332, "min": 0.10251757333333332, "max": 0.19838354285714285, "count": 33 }, "Pyramids.Policy.Epsilon.sum": { "value": 1.5377636, "min": 1.3886848, "max": 2.6623066, "count": 33 }, "Pyramids.Policy.Beta.mean": { "value": 0.000261505576, "min": 0.000261505576, "max": 0.00983851593142857, "count": 33 }, "Pyramids.Policy.Beta.sum": { "value": 0.00392258364, "min": 0.00392258364, "max": 0.12117708176000001, "count": 33 }, "Pyramids.Losses.RNDLoss.mean": { "value": 0.014357751235365868, "min": 0.014245187863707542, "max": 0.8657411932945251, "count": 33 }, "Pyramids.Losses.RNDLoss.sum": { "value": 0.21536627411842346, "min": 0.1994326263666153, "max": 6.060188293457031, "count": 33 }, "Pyramids.Environment.EpisodeLength.mean": { "value": 320.40425531914894, "min": 298.7755102040816, "max": 999.0, "count": 33 }, "Pyramids.Environment.EpisodeLength.sum": { "value": 30118.0, "min": 15984.0, "max": 32950.0, "count": 33 }, "Pyramids.Environment.CumulativeReward.mean": { "value": 1.6571139593919118, "min": -1.0000000521540642, "max": 1.6826123519963825, "count": 33 }, "Pyramids.Environment.CumulativeReward.sum": { "value": 154.1115982234478, "min": -30.753001734614372, "max": 163.2133981436491, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.mean": { "value": 1.6571139593919118, "min": -1.0000000521540642, "max": 1.6826123519963825, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.sum": { "value": 154.1115982234478, "min": -30.753001734614372, "max": 163.2133981436491, "count": 33 }, "Pyramids.Policy.RndReward.mean": { "value": 0.04798846740353232, "min": 0.044617075882173986, "max": 19.5529414601624, "count": 33 }, "Pyramids.Policy.RndReward.sum": { "value": 4.462927468528505, "min": 4.205655123165343, "max": 312.8470633625984, "count": 33 }, "Pyramids.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 }, "Pyramids.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1768143178", "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", "command_line_arguments": "/home/ruedi/miniconda3/envs/deep-rl-unit5/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.7.1+cu118", "numpy_version": "1.23.5", "end_time_seconds": "1768144877" }, "total": 1699.4481448590013, "count": 1, "self": 0.32161057400298887, "children": { "run_training.setup": { "total": 0.0265691769982368, "count": 1, "self": 0.0265691769982368 }, "TrainerController.start_learning": { "total": 1699.099965108, "count": 1, "self": 1.0126069420657586, "children": { "TrainerController._reset_env": { "total": 2.489544700998522, "count": 1, "self": 2.489544700998522 }, "TrainerController.advance": { "total": 1695.5144028799386, "count": 64098, "self": 1.0758791520038358, "children": { "env_step": { "total": 1034.170596157328, "count": 64098, "self": 900.9851592079485, "children": { "SubprocessEnvManager._take_step": { "total": 132.5512001971092, "count": 64098, "self": 3.4022758224764402, "children": { "TorchPolicy.evaluate": { "total": 129.14892437463277, "count": 62552, "self": 129.14892437463277 } } }, "workers": { "total": 0.6342367522702261, "count": 64098, "self": 0.0, "children": { "worker_root": { "total": 1696.8730614146843, "count": 64098, "is_parallel": true, "self": 883.3339461736832, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.002434030999211245, "count": 1, "is_parallel": true, "self": 0.0005315310008882079, "children": { "_process_rank_one_or_two_observation": { "total": 0.0019024999983230373, "count": 8, "is_parallel": true, "self": 0.0019024999983230373 } } }, "UnityEnvironment.step": { "total": 0.04527939900071942, "count": 1, "is_parallel": true, "self": 0.0008197250008379342, "children": { "UnityEnvironment._generate_step_input": { "total": 0.00027776099886978045, "count": 1, "is_parallel": true, "self": 0.00027776099886978045 }, "communicator.exchange": { "total": 0.042582305999530945, "count": 1, "is_parallel": true, "self": 0.042582305999530945 }, "steps_from_proto": { "total": 0.0015996070014807628, "count": 1, "is_parallel": true, "self": 0.00027805700119643006, "children": { "_process_rank_one_or_two_observation": { "total": 0.0013215500002843328, "count": 8, "is_parallel": true, "self": 0.0013215500002843328 } } } } } } }, "UnityEnvironment.step": { "total": 813.5391152410011, "count": 64097, "is_parallel": true, "self": 29.240099724700485, "children": { "UnityEnvironment._generate_step_input": { "total": 20.409567403163237, "count": 64097, "is_parallel": true, "self": 20.409567403163237 }, "communicator.exchange": { "total": 674.0780032838593, "count": 64097, "is_parallel": true, "self": 674.0780032838593 }, "steps_from_proto": { "total": 89.81144482927812, "count": 64097, "is_parallel": true, "self": 19.6907945834173, "children": { "_process_rank_one_or_two_observation": { "total": 70.12065024586082, "count": 512776, "is_parallel": true, "self": 70.12065024586082 } } } } } } } } } } }, "trainer_advance": { "total": 660.2679275706068, "count": 64098, "self": 1.7409405515463732, "children": { "process_trajectory": { "total": 109.60059068708506, "count": 64098, "self": 109.42354976508796, "children": { "RLTrainer._checkpoint": { "total": 0.1770409219971043, "count": 2, "self": 0.1770409219971043 } } }, "_update_policy": { "total": 548.9263963319754, "count": 453, "self": 265.25873213222076, "children": { "TorchPPOOptimizer.update": { "total": 283.66766419975465, "count": 22752, "self": 283.66766419975465 } } } } } } }, "trainer_threads": { "total": 8.049973985180259e-07, "count": 1, "self": 8.049973985180259e-07 }, "TrainerController._save_models": { "total": 0.0834097799997835, "count": 1, "self": 0.0009502159991825465, "children": { "RLTrainer._checkpoint": { "total": 0.08245956400060095, "count": 1, "self": 0.08245956400060095 } } } } } } }