pratsy's picture
First Push
2122aee
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 1.9146578311920166,
"min": 1.8816959857940674,
"max": 2.0939531326293945,
"count": 157
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 38293.15625,
"min": 3678.82763671875,
"max": 44219.87890625,
"count": 157
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 60.592592592592595,
"min": 31.166666666666668,
"max": 79.50819672131148,
"count": 157
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19632.0,
"min": 748.0,
"max": 20580.0,
"count": 157
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1657.1777093694855,
"min": 1596.0209309391514,
"max": 1677.4130578972968,
"count": 157
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 268462.78891785664,
"min": 19382.28819872509,
"max": 352889.09377889097,
"count": 157
},
"SoccerTwos.Step.mean": {
"value": 6189770.0,
"min": 4629984.0,
"max": 6189770.0,
"count": 157
},
"SoccerTwos.Step.sum": {
"value": 6189770.0,
"min": 4629984.0,
"max": 6189770.0,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.06133899837732315,
"min": -0.09600508213043213,
"max": 0.18259942531585693,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -9.99825668334961,
"min": -18.144960403442383,
"max": 19.597110748291016,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.05908508971333504,
"min": -0.0944647341966629,
"max": 0.17428338527679443,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -9.63086986541748,
"min": -17.85383415222168,
"max": 20.22264862060547,
"count": 157
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 157
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": -0.06833864867321553,
"min": -0.2674341448923437,
"max": 0.19632648712879902,
"count": 157
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": -11.13919973373413,
"min": -43.85919976234436,
"max": 38.9307998418808,
"count": 157
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": -0.06833864867321553,
"min": -0.2674341448923437,
"max": 0.19632648712879902,
"count": 157
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": -11.13919973373413,
"min": -43.85919976234436,
"max": 38.9307998418808,
"count": 157
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 157
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 157
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.015837372342745463,
"min": 0.011977727416281898,
"max": 0.02297194521718969,
"count": 75
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.015837372342745463,
"min": 0.011977727416281898,
"max": 0.02297194521718969,
"count": 75
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.09520911599198977,
"min": 0.0909675675133864,
"max": 0.11838576346635818,
"count": 75
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.09520911599198977,
"min": 0.0909675675133864,
"max": 0.11838576346635818,
"count": 75
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.09653009250760078,
"min": 0.0923578292131424,
"max": 0.12042878394325575,
"count": 75
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.09653009250760078,
"min": 0.0923578292131424,
"max": 0.12042878394325575,
"count": 75
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 75
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 75
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 75
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 75
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 75
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 75
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1690776249",
"python_version": "3.9.17 (main, Jul 5 2023, 20:47:11) [MSC v.1916 64 bit (AMD64)]",
"command_line_arguments": "\\\\?\\C:\\Users\\praty\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos --no-graphics --resume",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.0.1+cpu",
"numpy_version": "1.21.2",
"end_time_seconds": "1690783862"
},
"total": 7613.2275895,
"count": 1,
"self": 0.9505321000006006,
"children": {
"run_training.setup": {
"total": 0.15083029999999997,
"count": 1,
"self": 0.15083029999999997
},
"TrainerController.start_learning": {
"total": 7612.1262271,
"count": 1,
"self": 4.238344899974436,
"children": {
"TrainerController._reset_env": {
"total": 7.682890000000809,
"count": 9,
"self": 7.682890000000809
},
"TrainerController.advance": {
"total": 7599.935367400025,
"count": 108959,
"self": 4.202570799974637,
"children": {
"env_step": {
"total": 3077.6915271000057,
"count": 108959,
"self": 2458.9751897999404,
"children": {
"SubprocessEnvManager._take_step": {
"total": 616.0361776000498,
"count": 108959,
"self": 22.296344799981625,
"children": {
"TorchPolicy.evaluate": {
"total": 593.7398328000681,
"count": 196516,
"self": 593.7398328000681
}
}
},
"workers": {
"total": 2.6801597000153734,
"count": 108959,
"self": 0.0,
"children": {
"worker_root": {
"total": 7598.475134400012,
"count": 108959,
"is_parallel": true,
"self": 5599.011874100275,
"children": {
"steps_from_proto": {
"total": 0.03077510000040018,
"count": 18,
"is_parallel": true,
"self": 0.005130900002915162,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.025644199997485018,
"count": 72,
"is_parallel": true,
"self": 0.025644199997485018
}
}
},
"UnityEnvironment.step": {
"total": 1999.4324851997364,
"count": 108959,
"is_parallel": true,
"self": 99.331972099925,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 71.20992889990168,
"count": 108959,
"is_parallel": true,
"self": 71.20992889990168
},
"communicator.exchange": {
"total": 1504.614644099912,
"count": 108959,
"is_parallel": true,
"self": 1504.614644099912
},
"steps_from_proto": {
"total": 324.27594009999746,
"count": 217918,
"is_parallel": true,
"self": 64.65784739937868,
"children": {
"_process_rank_one_or_two_observation": {
"total": 259.6180927006188,
"count": 871672,
"is_parallel": true,
"self": 259.6180927006188
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 4518.041269500045,
"count": 108958,
"self": 28.42740490005417,
"children": {
"process_trajectory": {
"total": 693.1102233999923,
"count": 108958,
"self": 692.305933399992,
"children": {
"RLTrainer._checkpoint": {
"total": 0.8042900000002646,
"count": 3,
"self": 0.8042900000002646
}
}
},
"_update_policy": {
"total": 3796.5036411999986,
"count": 76,
"self": 325.9799644000127,
"children": {
"TorchPOCAOptimizer.update": {
"total": 3470.523676799986,
"count": 2280,
"self": 3470.523676799986
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.99999976757681e-06,
"count": 1,
"self": 1.99999976757681e-06
},
"TrainerController._save_models": {
"total": 0.26962279999952443,
"count": 1,
"self": 0.015377699999589822,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2542450999999346,
"count": 1,
"self": 0.2542450999999346
}
}
}
}
}
}
}