{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.9697359800338745, "min": 1.966881513595581, "max": 3.2957582473754883, "count": 500 }, "SoccerTwos.Policy.Entropy.sum": { "value": 36558.30078125, "min": 18374.30859375, "max": 150261.234375, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 60.9375, "min": 45.263636363636365, "max": 999.0, "count": 500 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19500.0, "min": 16004.0, "max": 25280.0, "count": 500 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1575.0734945998786, "min": 1199.3719246244254, "max": 1586.6469330201007, "count": 485 }, "SoccerTwos.Self-play.ELO.sum": { "value": 252011.75913598057, "min": 2401.550792445348, "max": 326194.5307976905, "count": 485 }, "SoccerTwos.Step.mean": { "value": 4999962.0, "min": 9656.0, "max": 4999962.0, "count": 500 }, "SoccerTwos.Step.sum": { "value": 4999962.0, "min": 9656.0, "max": 4999962.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.0028049107640981674, "min": -0.11986244469881058, "max": 0.21215492486953735, "count": 500 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 0.4487857222557068, "min": -18.93826675415039, "max": 36.06633758544922, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.0077615417540073395, "min": -0.12054023146629333, "max": 0.21119002997875214, "count": 500 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 1.2418466806411743, "min": -19.04535675048828, "max": 35.902305603027344, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.17381250262260436, "min": -0.5735230812659631, "max": 0.4473241423738414, "count": 500 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 27.8100004196167, "min": -57.73399996757507, "max": 58.933200001716614, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.17381250262260436, "min": -0.5735230812659631, "max": 0.4473241423738414, "count": 500 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 27.8100004196167, "min": -57.73399996757507, "max": 58.933200001716614, "count": 500 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 500 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015994472340874685, "min": 0.011336214087593059, "max": 0.023711995521443895, "count": 240 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015994472340874685, "min": 0.011336214087593059, "max": 0.023711995521443895, "count": 240 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.10218305985132853, "min": 5.2809119733865376e-05, "max": 0.11367578506469726, "count": 240 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.10218305985132853, "min": 5.2809119733865376e-05, "max": 0.11367578506469726, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.10294195239742597, "min": 5.217553346786493e-05, "max": 0.11512249285976092, "count": 240 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.10294195239742597, "min": 5.217553346786493e-05, "max": 0.11512249285976092, "count": 240 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 240 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 240 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 240 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1711455543", "python_version": "3.10.11 (v3.10.11:7d4cc5aa85, Apr 4 2023, 19:05:19) [Clang 13.0.0 (clang-1300.0.29.30)]", "command_line_arguments": "/Users/wangdawei/Projects/AI/huggingface/MultiAgentRL/.venv/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --force", "mlagents_version": "1.1.0.dev0", "mlagents_envs_version": "1.1.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "2.4.0.dev20240326", "numpy_version": "1.23.5", "end_time_seconds": "1711507762" }, "total": 14570.518130790995, "count": 1, "self": 0.17878216699318727, "children": { "run_training.setup": { "total": 0.011659041003440507, "count": 1, "self": 0.011659041003440507 }, "TrainerController.start_learning": { "total": 14570.327689582999, "count": 1, "self": 3.190981211075268, "children": { "TrainerController._reset_env": { "total": 2.392167252008221, "count": 25, "self": 2.392167252008221 }, "TrainerController.advance": { "total": 14564.665453703914, "count": 338852, "self": 2.828530138140195, "children": { "env_step": { "total": 11751.832948865609, "count": 338852, "self": 11278.344737993022, "children": { "SubprocessEnvManager._take_step": { "total": 471.30086098513857, "count": 338852, "self": 14.125520602196048, "children": { "TorchPolicy.evaluate": { "total": 457.1753403829425, "count": 633008, "self": 457.1753403829425 } } }, "workers": { "total": 2.187349887448363, "count": 338852, "self": 0.0, "children": { "worker_root": { "total": 14564.51819917561, "count": 338852, "is_parallel": true, "self": 3734.8232051270097, "children": { "steps_from_proto": { "total": 0.03025671300565591, "count": 50, "is_parallel": true, "self": 0.0039582969911862165, "children": { "_process_rank_one_or_two_observation": { "total": 0.026298416014469694, "count": 200, "is_parallel": true, "self": 0.026298416014469694 } } }, "UnityEnvironment.step": { "total": 10829.664737335595, "count": 338852, "is_parallel": true, "self": 30.503981621885032, "children": { "UnityEnvironment._generate_step_input": { "total": 197.2552867792765, "count": 338852, "is_parallel": true, "self": 197.2552867792765 }, "communicator.exchange": { "total": 10214.763104317295, "count": 338852, "is_parallel": true, "self": 10214.763104317295 }, "steps_from_proto": { "total": 387.1423646171388, "count": 677704, "is_parallel": true, "self": 47.881066484325856, "children": { "_process_rank_one_or_two_observation": { "total": 339.26129813281295, "count": 2710816, "is_parallel": true, "self": 339.26129813281295 } } } } } } } } } } }, "trainer_advance": { "total": 2810.0039747001647, "count": 338852, "self": 25.11066259314248, "children": { "process_trajectory": { "total": 640.1634038139673, "count": 338852, "self": 639.3627999789824, "children": { "RLTrainer._checkpoint": { "total": 0.8006038349849405, "count": 10, "self": 0.8006038349849405 } } }, "_update_policy": { "total": 2144.729908293055, "count": 240, "self": 248.46439873804775, "children": { "TorchPOCAOptimizer.update": { "total": 1896.2655095550072, "count": 7200, "self": 1896.2655095550072 } } } } } } }, "trainer_threads": { "total": 3.749955794773996e-07, "count": 1, "self": 3.749955794773996e-07 }, "TrainerController._save_models": { "total": 0.07908704100555042, "count": 1, "self": 0.0004973740069544874, "children": { "RLTrainer._checkpoint": { "total": 0.07858966699859593, "count": 1, "self": 0.07858966699859593 } } } } } } }