{ "name": "root", "gauges": { "Pyramids.Policy.Entropy.mean": { "value": 0.31564950942993164, "min": 0.3051823079586029, "max": 1.4416756629943848, "count": 33 }, "Pyramids.Policy.Entropy.sum": { "value": 9343.2255859375, "min": 9218.947265625, "max": 43734.671875, "count": 33 }, "Pyramids.Step.mean": { "value": 989989.0, "min": 29952.0, "max": 989989.0, "count": 33 }, "Pyramids.Step.sum": { "value": 989989.0, "min": 29952.0, "max": 989989.0, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.mean": { "value": 0.6356185674667358, "min": -0.09719794243574142, "max": 0.6356185674667358, "count": 33 }, "Pyramids.Policy.ExtrinsicValueEstimate.sum": { "value": 183.0581512451172, "min": -23.32750701904297, "max": 183.0581512451172, "count": 33 }, "Pyramids.Policy.RndValueEstimate.mean": { "value": 0.019015617668628693, "min": 0.011137646622955799, "max": 0.19304729998111725, "count": 33 }, "Pyramids.Policy.RndValueEstimate.sum": { "value": 5.476497650146484, "min": 3.1185410022735596, "max": 46.33135223388672, "count": 33 }, "Pyramids.Losses.PolicyLoss.mean": { "value": 0.07201716061118182, "min": 0.06651599989491136, "max": 0.07281037996429378, "count": 33 }, "Pyramids.Losses.PolicyLoss.sum": { "value": 1.0082402485565456, "min": 0.49753041101095064, "max": 1.0586281552872001, "count": 33 }, "Pyramids.Losses.ValueLoss.mean": { "value": 0.016889515849246438, "min": 0.0002569722005030127, "max": 0.017437762379995548, "count": 33 }, "Pyramids.Losses.ValueLoss.sum": { "value": 0.23645322188945014, "min": 0.00282669420553314, "max": 0.2615664356999332, "count": 33 }, "Pyramids.Policy.LearningRate.mean": { "value": 7.621533173807146e-06, "min": 7.621533173807146e-06, "max": 0.00029515063018788575, "count": 33 }, "Pyramids.Policy.LearningRate.sum": { "value": 0.00010670146443330004, "min": 0.00010670146443330004, "max": 0.0035079041306986993, "count": 33 }, "Pyramids.Policy.Epsilon.mean": { "value": 0.10254047857142858, "min": 0.10254047857142858, "max": 0.19838354285714285, "count": 33 }, "Pyramids.Policy.Epsilon.sum": { "value": 1.4355667, "min": 1.3691136000000002, "max": 2.5693013000000002, "count": 33 }, "Pyramids.Policy.Beta.mean": { "value": 0.00026379380928571447, "min": 0.00026379380928571447, "max": 0.00983851593142857, "count": 33 }, "Pyramids.Policy.Beta.sum": { "value": 0.003693113330000002, "min": 0.003693113330000002, "max": 0.11695319987, "count": 33 }, "Pyramids.Losses.RNDLoss.mean": { "value": 0.011276885867118835, "min": 0.011276885867118835, "max": 0.27956414222717285, "count": 33 }, "Pyramids.Losses.RNDLoss.sum": { "value": 0.1578764021396637, "min": 0.1578764021396637, "max": 1.95694899559021, "count": 33 }, "Pyramids.Environment.EpisodeLength.mean": { "value": 291.03, "min": 291.03, "max": 999.0, "count": 33 }, "Pyramids.Environment.EpisodeLength.sum": { "value": 29103.0, "min": 15984.0, "max": 32945.0, "count": 33 }, "Pyramids.Environment.CumulativeReward.mean": { "value": 1.6726222106754178, "min": -1.0000000521540642, "max": 1.6888265158144795, "count": 33 }, "Pyramids.Environment.CumulativeReward.sum": { "value": 165.58959885686636, "min": -32.000001668930054, "max": 174.69539833068848, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.mean": { "value": 1.6726222106754178, "min": -1.0000000521540642, "max": 1.6888265158144795, "count": 33 }, "Pyramids.Policy.ExtrinsicReward.sum": { "value": 165.58959885686636, "min": -32.000001668930054, "max": 174.69539833068848, "count": 33 }, "Pyramids.Policy.RndReward.mean": { "value": 0.03288113874367984, "min": 0.03288113874367984, "max": 5.389898657798767, "count": 33 }, "Pyramids.Policy.RndReward.sum": { "value": 3.255232735624304, "min": 3.255232735624304, "max": 86.23837852478027, "count": 33 }, "Pyramids.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 }, "Pyramids.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 33 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1757547514", "python_version": "3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]", "command_line_arguments": "/home/ladong/Workspace/huggingface-rl/huggingface/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.8.0+cu128", "numpy_version": "1.23.5", "end_time_seconds": "1757548661" }, "total": 1146.3358237480006, "count": 1, "self": 0.3194760460010002, "children": { "run_training.setup": { "total": 0.02376967199961655, "count": 1, "self": 0.02376967199961655 }, "TrainerController.start_learning": { "total": 1145.99257803, "count": 1, "self": 0.9767570348940353, "children": { "TrainerController._reset_env": { "total": 1.8665626120000525, "count": 1, "self": 1.8665626120000525 }, "TrainerController.advance": { "total": 1143.0914149681048, "count": 63961, "self": 1.0213735208162689, "children": { "env_step": { "total": 718.6054868112333, "count": 63961, "self": 612.8003210057941, "children": { "SubprocessEnvManager._take_step": { "total": 105.1444154605615, "count": 63961, "self": 2.7217539872490306, "children": { "TorchPolicy.evaluate": { "total": 102.42266147331247, "count": 62547, "self": 102.42266147331247 } } }, "workers": { "total": 0.6607503448776697, "count": 63961, "self": 0.0, "children": { "worker_root": { "total": 1144.6578969987768, "count": 63961, "is_parallel": true, "self": 598.9709585549026, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0011753800008591497, "count": 1, "is_parallel": true, "self": 0.00034511700505390763, "children": { "_process_rank_one_or_two_observation": { "total": 0.0008302629958052421, "count": 8, "is_parallel": true, "self": 0.0008302629958052421 } } }, "UnityEnvironment.step": { "total": 0.02318030900096346, "count": 1, "is_parallel": true, "self": 0.00021981999998388346, "children": { "UnityEnvironment._generate_step_input": { "total": 0.00019490500017127488, "count": 1, "is_parallel": true, "self": 0.00019490500017127488 }, "communicator.exchange": { "total": 0.022080583001297782, "count": 1, "is_parallel": true, "self": 0.022080583001297782 }, "steps_from_proto": { "total": 0.0006850009995105211, "count": 1, "is_parallel": true, "self": 0.00018278799871040974, "children": { "_process_rank_one_or_two_observation": { "total": 0.0005022130008001113, "count": 8, "is_parallel": true, "self": 0.0005022130008001113 } } } } } } }, "UnityEnvironment.step": { "total": 545.6869384438742, "count": 63960, "is_parallel": true, "self": 15.305896251862578, "children": { "UnityEnvironment._generate_step_input": { "total": 10.50695718704992, "count": 63960, "is_parallel": true, "self": 10.50695718704992 }, "communicator.exchange": { "total": 475.508802784796, "count": 63960, "is_parallel": true, "self": 475.508802784796 }, "steps_from_proto": { "total": 44.36528222016568, "count": 63960, "is_parallel": true, "self": 9.725696526184038, "children": { "_process_rank_one_or_two_observation": { "total": 34.639585693981644, "count": 511680, "is_parallel": true, "self": 34.639585693981644 } } } } } } } } } } }, "trainer_advance": { "total": 423.46455463605525, "count": 63961, "self": 1.6934744032205344, "children": { "process_trajectory": { "total": 78.20843168784086, "count": 63961, "self": 78.07713529884131, "children": { "RLTrainer._checkpoint": { "total": 0.13129638899954443, "count": 2, "self": 0.13129638899954443 } } }, "_update_policy": { "total": 343.56264854499386, "count": 447, "self": 187.90794703476058, "children": { "TorchPPOOptimizer.update": { "total": 155.65470151023328, "count": 22812, "self": 155.65470151023328 } } } } } } }, "trainer_threads": { "total": 7.210001058410853e-07, "count": 1, "self": 7.210001058410853e-07 }, "TrainerController._save_models": { "total": 0.057842694001010386, "count": 1, "self": 0.0008298360007756855, "children": { "RLTrainer._checkpoint": { "total": 0.0570128580002347, "count": 1, "self": 0.0570128580002347 } } } } } } }