{ "name": "root", "gauges": { "SnowballTarget.Policy.Entropy.mean": { "value": 0.85108882188797, "min": 0.85108882188797, "max": 2.8349575996398926, "count": 20 }, "SnowballTarget.Policy.Entropy.sum": { "value": 8088.748046875, "min": 8088.748046875, "max": 28939.248046875, "count": 20 }, "SnowballTarget.Step.mean": { "value": 199984.0, "min": 9952.0, "max": 199984.0, "count": 20 }, "SnowballTarget.Step.sum": { "value": 199984.0, "min": 9952.0, "max": 199984.0, "count": 20 }, "SnowballTarget.Policy.ExtrinsicValueEstimate.mean": { "value": 13.171271324157715, "min": 0.4332387149333954, "max": 13.171271324157715, "count": 20 }, "SnowballTarget.Policy.ExtrinsicValueEstimate.sum": { "value": 2568.39794921875, "min": 84.04830932617188, "max": 2663.498046875, "count": 20 }, "SnowballTarget.Losses.PolicyLoss.mean": { "value": 0.061033688282038256, "min": 0.061033688282038256, "max": 0.07578373484954914, "count": 20 }, "SnowballTarget.Losses.PolicyLoss.sum": { "value": 0.24413475312815303, "min": 0.24413475312815303, "max": 0.3488958057520601, "count": 20 }, "SnowballTarget.Losses.ValueLoss.mean": { "value": 0.21454466572579217, "min": 0.11203519281744043, "max": 0.27842677954365225, "count": 20 }, "SnowballTarget.Losses.ValueLoss.sum": { "value": 0.8581786629031687, "min": 0.4481407712697617, "max": 1.349739720716196, "count": 20 }, "SnowballTarget.Policy.LearningRate.mean": { "value": 8.082097306000005e-06, "min": 8.082097306000005e-06, "max": 0.000291882002706, "count": 20 }, "SnowballTarget.Policy.LearningRate.sum": { "value": 3.232838922400002e-05, "min": 3.232838922400002e-05, "max": 0.00138516003828, "count": 20 }, "SnowballTarget.Policy.Epsilon.mean": { "value": 0.10269400000000001, "min": 0.10269400000000001, "max": 0.19729400000000002, "count": 20 }, "SnowballTarget.Policy.Epsilon.sum": { "value": 0.41077600000000003, "min": 0.41077600000000003, "max": 0.96172, "count": 20 }, "SnowballTarget.Policy.Beta.mean": { "value": 0.0001444306000000001, "min": 0.0001444306000000001, "max": 0.0048649706, "count": 20 }, "SnowballTarget.Policy.Beta.sum": { "value": 0.0005777224000000004, "min": 0.0005777224000000004, "max": 0.023089828, "count": 20 }, "SnowballTarget.Environment.EpisodeLength.mean": { "value": 199.0, "min": 199.0, "max": 199.0, "count": 20 }, "SnowballTarget.Environment.EpisodeLength.sum": { "value": 8756.0, "min": 8756.0, "max": 10945.0, "count": 20 }, "SnowballTarget.Environment.CumulativeReward.mean": { "value": 26.136363636363637, "min": 3.6136363636363638, "max": 26.136363636363637, "count": 20 }, "SnowballTarget.Environment.CumulativeReward.sum": { "value": 1150.0, "min": 159.0, "max": 1417.0, "count": 20 }, "SnowballTarget.Policy.ExtrinsicReward.mean": { "value": 26.136363636363637, "min": 3.6136363636363638, "max": 26.136363636363637, "count": 20 }, "SnowballTarget.Policy.ExtrinsicReward.sum": { "value": 1150.0, "min": 159.0, "max": 1417.0, "count": 20 }, "SnowballTarget.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 20 }, "SnowballTarget.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 20 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1758183568", "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", "command_line_arguments": "/home/yuming/miniconda3/envs/rl_ml_agent/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.8.0+cu128", "numpy_version": "1.23.5", "end_time_seconds": "1758183806" }, "total": 237.8115922230063, "count": 1, "self": 0.21907189901685342, "children": { "run_training.setup": { "total": 0.018007445993134752, "count": 1, "self": 0.018007445993134752 }, "TrainerController.start_learning": { "total": 237.5745128779963, "count": 1, "self": 0.2309214686974883, "children": { "TrainerController._reset_env": { "total": 2.2001233069750015, "count": 1, "self": 2.2001233069750015 }, "TrainerController.advance": { "total": 235.08754069532733, "count": 18192, "self": 0.22763188334647566, "children": { "env_step": { "total": 156.68144263268914, "count": 18192, "self": 104.22910317353671, "children": { "SubprocessEnvManager._take_step": { "total": 52.3040707501641, "count": 18192, "self": 0.6986452149285469, "children": { "TorchPolicy.evaluate": { "total": 51.605425535235554, "count": 18192, "self": 51.605425535235554 } } }, "workers": { "total": 0.14826870898832567, "count": 18192, "self": 0.0, "children": { "worker_root": { "total": 237.08271187069477, "count": 18192, "is_parallel": true, "self": 147.1819063592411, "children": { "run_training.setup": { "total": 0.0, "count": 0, "is_parallel": true, "self": 0.0, "children": { "steps_from_proto": { "total": 0.0012790839828085154, "count": 1, "is_parallel": true, "self": 0.0005449909658636898, "children": { "_process_rank_one_or_two_observation": { "total": 0.0007340930169448256, "count": 10, "is_parallel": true, "self": 0.0007340930169448256 } } }, "UnityEnvironment.step": { "total": 0.015110868000192568, "count": 1, "is_parallel": true, "self": 0.00020241702441126108, "children": { "UnityEnvironment._generate_step_input": { "total": 0.00015106599312275648, "count": 1, "is_parallel": true, "self": 0.00015106599312275648 }, "communicator.exchange": { "total": 0.01404297599219717, "count": 1, "is_parallel": true, "self": 0.01404297599219717 }, "steps_from_proto": { "total": 0.0007144089904613793, "count": 1, "is_parallel": true, "self": 0.00017903296975418925, "children": { "_process_rank_one_or_two_observation": { "total": 0.00053537602070719, "count": 10, "is_parallel": true, "self": 0.00053537602070719 } } } } } } }, "UnityEnvironment.step": { "total": 89.90080551145365, "count": 18191, "is_parallel": true, "self": 3.7129160354670603, "children": { "UnityEnvironment._generate_step_input": { "total": 2.1270105940348003, "count": 18191, "is_parallel": true, "self": 2.1270105940348003 }, "communicator.exchange": { "total": 72.09148825949524, "count": 18191, "is_parallel": true, "self": 72.09148825949524 }, "steps_from_proto": { "total": 11.969390622456558, "count": 18191, "is_parallel": true, "self": 2.5777382043306716, "children": { "_process_rank_one_or_two_observation": { "total": 9.391652418125886, "count": 181910, "is_parallel": true, "self": 9.391652418125886 } } } } } } } } } } }, "trainer_advance": { "total": 78.17846617929172, "count": 18192, "self": 0.2692121226573363, "children": { "process_trajectory": { "total": 17.299096313596237, "count": 18192, "self": 17.027359767613234, "children": { "RLTrainer._checkpoint": { "total": 0.27173654598300345, "count": 4, "self": 0.27173654598300345 } } }, "_update_policy": { "total": 60.610157743038144, "count": 90, "self": 22.548599490866764, "children": { "TorchPPOOptimizer.update": { "total": 38.06155825217138, "count": 4587, "self": 38.06155825217138 } } } } } } }, "trainer_threads": { "total": 6.709888111799955e-07, "count": 1, "self": 6.709888111799955e-07 }, "TrainerController._save_models": { "total": 0.05592673600767739, "count": 1, "self": 0.0004650610208045691, "children": { "RLTrainer._checkpoint": { "total": 0.05546167498687282, "count": 1, "self": 0.05546167498687282 } } } } } } }