{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.8415113687515259, "min": 1.7726730108261108, "max": 3.295701265335083, "count": 543 }, "SoccerTwos.Policy.Entropy.sum": { "value": 39305.21875, "min": 19400.90625, "max": 115293.640625, "count": 543 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 67.47297297297297, "min": 45.44859813084112, "max": 999.0, "count": 543 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19972.0, "min": 9424.0, "max": 30784.0, "count": 543 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1622.5218249077457, "min": 1198.517206612908, "max": 1629.431628820468, "count": 526 }, "SoccerTwos.Self-play.ELO.sum": { "value": 240133.23008634636, "min": 2401.4985608882907, "max": 333091.46465859056, "count": 526 }, "SoccerTwos.Step.mean": { "value": 5429977.0, "min": 9408.0, "max": 5429977.0, "count": 543 }, "SoccerTwos.Step.sum": { "value": 5429977.0, "min": 9408.0, "max": 5429977.0, "count": 543 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.04260917752981186, "min": -0.09858232736587524, "max": 0.15926605463027954, "count": 543 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -6.306158065795898, "min": -19.236608505249023, "max": 24.1718692779541, "count": 543 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.04490833729505539, "min": -0.10374883562326431, "max": 0.1724524050951004, "count": 543 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -6.6464338302612305, "min": -19.919776916503906, "max": 25.397396087646484, "count": 543 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 543 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 543 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": -0.017578374695133517, "min": -0.5714285714285714, "max": 0.46935529638739193, "count": 543 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": -2.6015994548797607, "min": -48.8832004070282, "max": 51.39419996738434, "count": 543 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": -0.017578374695133517, "min": -0.5714285714285714, "max": 0.46935529638739193, "count": 543 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": -2.6015994548797607, "min": -48.8832004070282, "max": 51.39419996738434, "count": 543 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 543 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 543 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.016215235033693414, "min": 0.011285145840762805, "max": 0.024968095269287005, "count": 260 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.016215235033693414, "min": 0.011285145840762805, "max": 0.024968095269287005, "count": 260 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10830937698483467, "min": 6.865721167438702e-05, "max": 0.1166601357360681, "count": 260 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10830937698483467, "min": 6.865721167438702e-05, "max": 0.1166601357360681, "count": 260 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.11072705338398615, "min": 7.029581753765039e-05, "max": 0.11871446867783865, "count": 260 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.11072705338398615, "min": 7.029581753765039e-05, "max": 0.11871446867783865, "count": 260 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 260 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 260 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 260 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 260 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 260 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 260 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1746756751", "python_version": "3.10.12 | packaged by Anaconda, Inc. | (main, Jul 5 2023, 19:01:18) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "\\\\?\\C:\\Users\\soldesk\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics", "mlagents_version": "1.2.0.dev0", "mlagents_envs_version": "1.2.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.7.0+cpu", "numpy_version": "1.23.5", "end_time_seconds": "1746781548" }, "total": 24797.071732399985, "count": 1, "self": 2.927421599975787, "children": { "run_training.setup": { "total": 0.18820279999636114, "count": 1, "self": 0.18820279999636114 }, "TrainerController.start_learning": { "total": 24793.956108000013, "count": 1, "self": 17.457587655633688, "children": { "TrainerController._reset_env": { "total": 24.734964900184423, "count": 28, "self": 24.734964900184423 }, "TrainerController.advance": { "total": 24751.505926244194, "count": 369061, "self": 19.61032999062445, "children": { "env_step": { "total": 11162.883422868326, "count": 369061, "self": 8100.500175192719, "children": { "SubprocessEnvManager._take_step": { "total": 3052.475034287665, "count": 369061, "self": 109.73130642529577, "children": { "TorchPolicy.evaluate": { "total": 2942.7437278623693, "count": 687006, "self": 2942.7437278623693 } } }, "workers": { "total": 9.908213387941942, "count": 369061, "self": 0.0, "children": { "worker_root": { "total": 24739.80137448234, "count": 369061, "is_parallel": true, "self": 18577.941311041242, "children": { "steps_from_proto": { "total": 0.06123480061069131, "count": 56, "is_parallel": true, "self": 0.01283439970575273, "children": { "_process_rank_one_or_two_observation": { "total": 0.04840040090493858, "count": 224, "is_parallel": true, "self": 0.04840040090493858 } } }, "UnityEnvironment.step": { "total": 6161.798828640487, "count": 369061, "is_parallel": true, "self": 249.52779166109394, "children": { "UnityEnvironment._generate_step_input": { "total": 185.03382068499923, "count": 369061, "is_parallel": true, "self": 185.03382068499923 }, "communicator.exchange": { "total": 4947.23758500407, "count": 369061, "is_parallel": true, "self": 4947.23758500407 }, "steps_from_proto": { "total": 779.999631290324, "count": 738122, "is_parallel": true, "self": 162.43776214006357, "children": { "_process_rank_one_or_two_observation": { "total": 617.5618691502605, "count": 2952488, "is_parallel": true, "self": 617.5618691502605 } } } } } } } } } } }, "trainer_advance": { "total": 13569.012173385243, "count": 369061, "self": 101.24363214732148, "children": { "process_trajectory": { "total": 2964.4469012388727, "count": 369061, "self": 2960.806616738555, "children": { "RLTrainer._checkpoint": { "total": 3.640284500317648, "count": 10, "self": 3.640284500317648 } } }, "_update_policy": { "total": 10503.321639999049, "count": 261, "self": 1687.3556401951937, "children": { "TorchPOCAOptimizer.update": { "total": 8815.965999803855, "count": 7810, "self": 8815.965999803855 } } } } } } }, "trainer_threads": { "total": 1.300009898841381e-06, "count": 1, "self": 1.300009898841381e-06 }, "TrainerController._save_models": { "total": 0.2576278999913484, "count": 1, "self": 0.019494600011967123, "children": { "RLTrainer._checkpoint": { "total": 0.23813329997938126, "count": 1, "self": 0.23813329997938126 } } } } } } }