{ "name": "root", "gauges": { "Pyramids.Policy.Entropy.mean": { "value": 0.4389995336532593, "min": 0.42045554518699646, "max": 1.4630111455917358, "count": 33 }, "Pyramids.Policy.Entropy.sum": { "value": 13092.7216796875, "min": 12553.12109375, "max": 44381.90625, "count": 33 }, "Pyramids.Step.mean": { "value": 989883.0, "min": 29952.0, "max": 989883.0, "count": 33 }, "Pyramids.Step.sum": { "value": 989883.0, "min": 29952.0, "max": 989883.0, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.mean": { "value": 0.49429410696029663, "min": -0.08135484904050827, "max": 0.49429410696029663, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.sum": { "value": 135.93087768554688, "min": -19.606517791748047, "max": 135.93087768554688, "count": 33 }, "Pyramids.Policy.RndValueEstimate.mean": { "value": 0.024275707080960274, "min": -0.09598658233880997, "max": 0.365374892950058, "count": 33 }, "Pyramids.Policy.RndValueEstimate.sum": { "value": 6.675819396972656, "min": -25.53243064880371, "max": 87.68997192382812, "count": 33 }, "Pyramids.Losses.PolicyLoss.mean": { "value": 0.0690484138654678, "min": 0.0657107886040063, "max": 0.07309162523628276, "count": 33 }, "Pyramids.Losses.PolicyLoss.sum": { "value": 0.9666777941165491, "min": 0.4727661243162183, "max": 1.060646082698048, "count": 33 }, "Pyramids.Losses.ValueLoss.mean": { "value": 0.01574416473712994, "min": 0.0009589710182693636, "max": 0.015765055310718406, "count": 33 }, "Pyramids.Losses.ValueLoss.sum": { "value": 0.22041830631981912, "min": 0.006712797127885545, "max": 0.22071077435005768, "count": 33 }, "Pyramids.Policy.LearningRate.mean": { "value": 7.625218886864282e-06, "min": 7.625218886864282e-06, "max": 0.00029515063018788575, "count": 33 }, "Pyramids.Policy.LearningRate.sum": { "value": 0.00010675306441609995, "min": 0.00010675306441609995, "max": 0.0035081951306017, "count": 33 }, "Pyramids.Policy.Epsilon.mean": { "value": 0.10254170714285717, "min": 0.10254170714285717, "max": 0.19838354285714285, "count": 33 }, "Pyramids.Policy.Epsilon.sum": { "value": 1.4355839000000004, "min": 1.3691136000000002, "max": 2.5693983000000005, "count": 33 }, "Pyramids.Policy.Beta.mean": { "value": 0.00026391654357142854, "min": 0.00026391654357142854, "max": 0.00983851593142857, "count": 33 }, "Pyramids.Policy.Beta.sum": { "value": 0.0036948316099999993, "min": 0.0036948316099999993, "max": 0.11696289016999999, "count": 33 }, "Pyramids.Losses.RNDLoss.mean": { "value": 0.01384345255792141, "min": 0.01384345255792141, "max": 0.513745129108429, "count": 33 }, "Pyramids.Losses.RNDLoss.sum": { "value": 0.19380833208560944, "min": 0.19380833208560944, "max": 3.5962159633636475, "count": 33 }, "Pyramids.Environment.EpisodeLength.mean": { "value": 371.6875, "min": 371.6875, "max": 999.0, "count": 33 }, "Pyramids.Environment.EpisodeLength.sum": { "value": 29735.0, "min": 15984.0, "max": 33160.0, "count": 33 }, "Pyramids.Environment.CumulativeReward.mean": { "value": 1.553294979315251, "min": -1.0000000521540642, "max": 1.553294979315251, "count": 33 }, "Pyramids.Environment.CumulativeReward.sum": { "value": 124.26359834522009, "min": -32.000001668930054, "max": 124.26359834522009, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.mean": { "value": 1.553294979315251, "min": -1.0000000521540642, "max": 1.553294979315251, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.sum": { "value": 124.26359834522009, "min": -32.000001668930054, "max": 124.26359834522009, "count": 33 }, "Pyramids.Policy.RndReward.mean": { "value": 0.053356974821872426, "min": 0.053356974821872426, "max": 10.283461756072938, "count": 33 }, "Pyramids.Policy.RndReward.sum": { "value": 4.268557985749794, "min": 4.268557985749794, "max": 164.53538809716702, "count": 33 }, "Pyramids.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 }, "Pyramids.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1758184192", "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", "command_line_arguments": "/home/yuming/miniconda3/envs/rl_ml_agent/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.8.0+cu128", "numpy_version": "1.23.5", "end_time_seconds": "1758185250" }, "total": 1058.0915854540071, "count": 1, "self": 0.26861677900888026, "children": { "run_training.setup": { "total": 0.01250700099626556, "count": 1, "self": 0.01250700099626556 }, "TrainerController.start_learning": { "total": 1057.810461674002, "count": 1, "self": 0.9363300465920474, "children": { "TrainerController._reset_env": { "total": 1.5959005370095838, "count": 1, "self": 1.5959005370095838 }, "TrainerController.advance": { "total": 1055.2140239283908, "count": 63647, "self": 0.8749976060935296, "children": { "env_step": { "total": 610.2634240581538, "count": 63647, "self": 483.5795742230257, "children": { "SubprocessEnvManager._take_step": { "total": 126.10740250276285, "count": 63647, "self": 2.6507814017822966, "children": { "TorchPolicy.evaluate": { "total": 123.45662110098056, "count": 62561, "self": 123.45662110098056 } } }, "workers": { "total": 0.5764473323652055, "count": 63647, "self": 0.0, "children": { "worker_root": { "total": 1056.5960486974218, "count": 63647, "is_parallel": true, "self": 631.5242763401184, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0009857149852905422, "count": 1, "is_parallel": true, "self": 0.00032684794859960675, "children": { "_process_rank_one_or_two_observation": { "total": 0.0006588670366909355, "count": 8, "is_parallel": true, "self": 0.0006588670366909355 } } }, "UnityEnvironment.step": { "total": 0.020267093990696594, "count": 1, "is_parallel": true, "self": 0.00020054500782862306, "children": { "UnityEnvironment._generate_step_input": { "total": 0.00017034998745657504, "count": 1, "is_parallel": true, "self": 0.00017034998745657504 }, "communicator.exchange": { "total": 0.019269573997007683, "count": 1, "is_parallel": true, "self": 0.019269573997007683 }, "steps_from_proto": { "total": 0.0006266249984037131, "count": 1, "is_parallel": true, "self": 0.0001542279205750674, "children": { "_process_rank_one_or_two_observation": { "total": 0.0004723970778286457, "count": 8, "is_parallel": true, "self": 0.0004723970778286457 } } } } } } }, "UnityEnvironment.step": { "total": 425.0717723573034, "count": 63646, "is_parallel": true, "self": 12.706232581025688, "children": { "UnityEnvironment._generate_step_input": { "total": 9.13223309617024, "count": 63646, "is_parallel": true, "self": 9.13223309617024 }, "communicator.exchange": { "total": 364.6950462360692, "count": 63646, "is_parallel": true, "self": 364.6950462360692 }, "steps_from_proto": { "total": 38.538260444038315, "count": 63646, "is_parallel": true, "self": 9.022620393137913, "children": { "_process_rank_one_or_two_observation": { "total": 29.5156400509004, "count": 509168, "is_parallel": true, "self": 29.5156400509004 } } } } } } } } } } }, "trainer_advance": { "total": 444.0756022641435, "count": 63647, "self": 1.531417848658748, "children": { "process_trajectory": { "total": 82.66186084484798, "count": 63647, "self": 82.53185816685436, "children": { "RLTrainer._checkpoint": { "total": 0.1300026779936161, "count": 2, "self": 0.1300026779936161 } } }, "_update_policy": { "total": 359.8823235706368, "count": 447, "self": 189.6681899887917, "children": { "TorchPPOOptimizer.update": { "total": 170.2141335818451, "count": 22779, "self": 170.2141335818451 } } } } } } }, "trainer_threads": { "total": 9.960203897207975e-07, "count": 1, "self": 9.960203897207975e-07 }, "TrainerController._save_models": { "total": 0.0642061659891624, "count": 1, "self": 0.0006714320043101907, "children": { "RLTrainer._checkpoint": { "total": 0.06353473398485221, "count": 1, "self": 0.06353473398485221 } } } } } } }