{ "name": "root", "gauges": { "SnowballTarget.Policy.Entropy.mean": { "value": 0.9937470555305481, "min": 0.9932188391685486, "max": 2.865901231765747, "count": 26 }, "SnowballTarget.Policy.Entropy.sum": { "value": 10220.6884765625, "min": 9505.1044921875, "max": 29475.79296875, "count": 26 }, "SnowballTarget.Step.mean": { "value": 259984.0, "min": 9952.0, "max": 259984.0, "count": 26 }, "SnowballTarget.Step.sum": { "value": 259984.0, "min": 9952.0, "max": 259984.0, "count": 26 }, "SnowballTarget.Policy.ExtrinsicValueEstimate.mean": { "value": 13.172257423400879, "min": 0.3891133666038513, "max": 13.172257423400879, "count": 26 }, "SnowballTarget.Policy.ExtrinsicValueEstimate.sum": { "value": 2700.312744140625, "min": 75.48799133300781, "max": 2700.312744140625, "count": 26 }, "SnowballTarget.Environment.EpisodeLength.mean": { "value": 199.0, "min": 199.0, "max": 199.0, "count": 26 }, "SnowballTarget.Environment.EpisodeLength.sum": { "value": 10945.0, "min": 8756.0, "max": 10945.0, "count": 26 }, "SnowballTarget.Losses.PolicyLoss.mean": { "value": 0.07080489360206925, "min": 0.06207365351164138, "max": 0.0747272557803537, "count": 26 }, "SnowballTarget.Losses.PolicyLoss.sum": { "value": 0.35402446801034626, "min": 0.24829461404656553, "max": 0.37363627890176854, "count": 26 }, "SnowballTarget.Losses.ValueLoss.mean": { "value": 0.20982883528751484, "min": 0.10992890026411223, "max": 0.28365658201715527, "count": 26 }, "SnowballTarget.Losses.ValueLoss.sum": { "value": 1.0491441764375742, "min": 0.4397156010564489, "max": 1.3633389063909942, "count": 26 }, "SnowballTarget.Policy.LearningRate.mean": { "value": 8.047582669067381e-06, "min": 8.047582669067381e-06, "max": 0.0002938064595840454, "count": 26 }, "SnowballTarget.Policy.LearningRate.sum": { "value": 4.02379133453369e-05, "min": 4.02379133453369e-05, "max": 0.0014123840624084472, "count": 26 }, "SnowballTarget.Policy.Epsilon.mean": { "value": 0.10268249511718752, "min": 0.10268249511718752, "max": 0.19793548583984377, "count": 26 }, "SnowballTarget.Policy.Epsilon.sum": { "value": 0.5134124755859376, "min": 0.4258361816406251, "max": 0.9707946777343752, "count": 26 }, "SnowballTarget.Policy.Beta.mean": { "value": 0.00014385650634765628, "min": 0.00014385650634765628, "max": 0.0048969807434082035, "count": 26 }, "SnowballTarget.Policy.Beta.sum": { "value": 0.0007192825317382814, "min": 0.0007192825317382814, "max": 0.023542654418945312, "count": 26 }, "SnowballTarget.Environment.CumulativeReward.mean": { "value": 26.09090909090909, "min": 2.9545454545454546, "max": 26.09090909090909, "count": 26 }, "SnowballTarget.Environment.CumulativeReward.sum": { "value": 1435.0, "min": 130.0, "max": 1435.0, "count": 26 }, "SnowballTarget.Policy.ExtrinsicReward.mean": { "value": 26.09090909090909, "min": 2.9545454545454546, "max": 26.09090909090909, "count": 26 }, "SnowballTarget.Policy.ExtrinsicReward.sum": { "value": 1435.0, "min": 130.0, "max": 1435.0, "count": 26 }, "SnowballTarget.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 26 }, "SnowballTarget.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 26 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1738523612", "python_version": "3.10.16 (main, Dec 4 2024, 08:53:38) [GCC 13.2.0]", "command_line_arguments": "/home/hrichter/projects/hf_rl_course/.venv/bin/mlagents-learn snowball_target.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.5.1+cu124", "numpy_version": "1.23.5", "end_time_seconds": "1738523832" }, "total": 220.21882521600003, "count": 1, "self": 0.16743964799991318, "children": { "run_training.setup": { "total": 0.01430707200006509, "count": 1, "self": 0.01430707200006509 }, "TrainerController.start_learning": { "total": 220.03707849600005, "count": 1, "self": 0.29678710600796876, "children": { "TrainerController._reset_env": { "total": 0.9881883109999308, "count": 1, "self": 0.9881883109999308 }, "TrainerController.advance": { "total": 218.71728829899212, "count": 23870, "self": 0.13877166698523524, "children": { "env_step": { "total": 218.57851663200688, "count": 23870, "self": 170.48863598802143, "children": { "SubprocessEnvManager._take_step": { "total": 47.94756175899306, "count": 23870, "self": 0.733845374003181, "children": { "TorchPolicy.evaluate": { "total": 47.21371638498988, "count": 23870, "self": 47.21371638498988 } } }, "workers": { "total": 0.14231888499239176, "count": 23870, "self": 0.0, "children": { "worker_root": { "total": 219.6435308600096, "count": 23870, "is_parallel": true, "self": 101.76140652001902, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.001100930999996308, "count": 1, "is_parallel": true, "self": 0.0002678159997913099, "children": { "_process_rank_one_or_two_observation": { "total": 0.0008331150002049981, "count": 10, "is_parallel": true, "self": 0.0008331150002049981 } } }, "UnityEnvironment.step": { "total": 0.014656923999950777, "count": 1, "is_parallel": true, "self": 0.00024516799999219074, "children": { "UnityEnvironment._generate_step_input": { "total": 0.00017713599993385287, "count": 1, "is_parallel": true, "self": 0.00017713599993385287 }, "communicator.exchange": { "total": 0.013529447999985678, "count": 1, "is_parallel": true, "self": 0.013529447999985678 }, "steps_from_proto": { "total": 0.000705172000039056, "count": 1, "is_parallel": true, "self": 0.00014282500001172593, "children": { "_process_rank_one_or_two_observation": { "total": 0.0005623470000273301, "count": 10, "is_parallel": true, "self": 0.0005623470000273301 } } } } } } }, "UnityEnvironment.step": { "total": 117.88212433999058, "count": 23869, "is_parallel": true, "self": 4.965385717998402, "children": { "UnityEnvironment._generate_step_input": { "total": 2.7094940289965734, "count": 23869, "is_parallel": true, "self": 2.7094940289965734 }, "communicator.exchange": { "total": 94.92899641999452, "count": 23869, "is_parallel": true, "self": 94.92899641999452 }, "steps_from_proto": { "total": 15.278248173001089, "count": 23869, "is_parallel": true, "self": 2.901951909994409, "children": { "_process_rank_one_or_two_observation": { "total": 12.37629626300668, "count": 238690, "is_parallel": true, "self": 12.37629626300668 } } } } } } } } } } } } }, "trainer_threads": { "total": 0.00013065600001027633, "count": 1, "self": 0.00013065600001027633, "children": { "thread_root": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "trainer_advance": { "total": 218.1420525019978, "count": 267456, "is_parallel": true, "self": 2.049699340002917, "children": { "process_trajectory": { "total": 118.50296957099522, "count": 267456, "is_parallel": true, "self": 118.14739669499545, "children": { "RLTrainer._checkpoint": { "total": 0.35557287599976917, "count": 5, "is_parallel": true, "self": 0.35557287599976917 } } }, "_update_policy": { "total": 97.58938359099966, "count": 119, "is_parallel": true, "self": 26.549358390000293, "children": { "TorchPPOOptimizer.update": { "total": 71.04002520099937, "count": 6066, "is_parallel": true, "self": 71.04002520099937 } } } } } } } } }, "TrainerController._save_models": { "total": 0.034684124000023075, "count": 1, "self": 0.0007391590000906945, "children": { "RLTrainer._checkpoint": { "total": 0.03394496499993238, "count": 1, "self": 0.03394496499993238 } } } } } } }