poca-SoccerTwos / run_logs /timers.json
agercas's picture
initial commit: soccer twos trained for 10M steps
10a1027
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 2.0250492095947266,
"min": 1.969856858253479,
"max": 3.295700788497925,
"count": 1000
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 41343.40234375,
"min": 20036.466796875,
"max": 139695.78125,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 46.666666666666664,
"min": 38.38095238095238,
"max": 999.0,
"count": 1000
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19600.0,
"min": 11172.0,
"max": 30276.0,
"count": 1000
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1467.962624332801,
"min": 1188.8156360974353,
"max": 1467.962624332801,
"count": 605
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 308272.1511098882,
"min": 2383.234956677933,
"max": 368481.32199275703,
"count": 605
},
"SoccerTwos.Step.mean": {
"value": 9999858.0,
"min": 9154.0,
"max": 9999858.0,
"count": 1000
},
"SoccerTwos.Step.sum": {
"value": 9999858.0,
"min": 9154.0,
"max": 9999858.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": 0.04327896237373352,
"min": -0.11024277657270432,
"max": 0.16163739562034607,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": 9.088582038879395,
"min": -23.040740966796875,
"max": 28.464092254638672,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.04228388890624046,
"min": -0.1127559170126915,
"max": 0.16538487374782562,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": 8.879616737365723,
"min": -23.56598663330078,
"max": 27.964107513427734,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.10279428703444345,
"min": -0.7293440008163452,
"max": 0.43915294198428884,
"count": 1000
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 21.586800277233124,
"min": -40.9371999502182,
"max": 53.12920010089874,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.10279428703444345,
"min": -0.7293440008163452,
"max": 0.43915294198428884,
"count": 1000
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 21.586800277233124,
"min": -40.9371999502182,
"max": 53.12920010089874,
"count": 1000
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 1000
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.01720704310379612,
"min": 0.010555019648745657,
"max": 0.023545318642087903,
"count": 468
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.01720704310379612,
"min": 0.010555019648745657,
"max": 0.023545318642087903,
"count": 468
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.1077395275235176,
"min": 4.276794488925854e-09,
"max": 0.12304671903451284,
"count": 468
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.1077395275235176,
"min": 4.276794488925854e-09,
"max": 0.12304671903451284,
"count": 468
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.10922645876804987,
"min": 5.465641420935678e-09,
"max": 0.12631079827745756,
"count": 468
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.10922645876804987,
"min": 5.465641420935678e-09,
"max": 0.12631079827745756,
"count": 468
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 468
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 468
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 468
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 468
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 468
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 468
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1675511306",
"python_version": "3.9.7 (default, Oct 6 2021, 10:18:28) \n[Clang 13.0.0 (clang-1300.0.29.3)]",
"command_line_arguments": "/Users/agercas/.pyenv/versions/hf_drl_unit7/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/macos/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos10M --no-graphics --force",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0",
"numpy_version": "1.21.2",
"end_time_seconds": "1675556486"
},
"total": 45178.522654042,
"count": 1,
"self": 0.19941029199253535,
"children": {
"run_training.setup": {
"total": 0.026308125000000016,
"count": 1,
"self": 0.026308125000000016
},
"TrainerController.start_learning": {
"total": 45178.296935625,
"count": 1,
"self": 7.231531200646714,
"children": {
"TrainerController._reset_env": {
"total": 4.5023442920175,
"count": 50,
"self": 4.5023442920175
},
"TrainerController.advance": {
"total": 45166.482550465334,
"count": 663775,
"self": 6.626405849769071,
"children": {
"env_step": {
"total": 37762.5261885176,
"count": 663775,
"self": 36662.77503542597,
"children": {
"SubprocessEnvManager._take_step": {
"total": 1094.6808622603596,
"count": 663775,
"self": 33.88777491465271,
"children": {
"TorchPolicy.evaluate": {
"total": 1060.793087345707,
"count": 1283248,
"self": 1060.793087345707
}
}
},
"workers": {
"total": 5.070290831270878,
"count": 663775,
"self": 0.0,
"children": {
"worker_root": {
"total": 45162.97832394736,
"count": 663775,
"is_parallel": true,
"self": 9542.034145479447,
"children": {
"steps_from_proto": {
"total": 0.0811352509911385,
"count": 100,
"is_parallel": true,
"self": 0.009099664929583362,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.07203558606155513,
"count": 400,
"is_parallel": true,
"self": 0.07203558606155513
}
}
},
"UnityEnvironment.step": {
"total": 35620.86304321692,
"count": 663775,
"is_parallel": true,
"self": 88.35763253782352,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 613.2643464051904,
"count": 663775,
"is_parallel": true,
"self": 613.2643464051904
},
"communicator.exchange": {
"total": 33688.85765295024,
"count": 663775,
"is_parallel": true,
"self": 33688.85765295024
},
"steps_from_proto": {
"total": 1230.383411323672,
"count": 1327550,
"is_parallel": true,
"self": 135.1490761978921,
"children": {
"_process_rank_one_or_two_observation": {
"total": 1095.2343351257798,
"count": 5310200,
"is_parallel": true,
"self": 1095.2343351257798
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 7397.329956097961,
"count": 663775,
"self": 58.71258690434843,
"children": {
"process_trajectory": {
"total": 1248.6041286876475,
"count": 663775,
"self": 1246.798400479638,
"children": {
"RLTrainer._checkpoint": {
"total": 1.805728208009441,
"count": 20,
"self": 1.805728208009441
}
}
},
"_update_policy": {
"total": 6090.013240505965,
"count": 468,
"self": 930.4642091497153,
"children": {
"TorchPOCAOptimizer.update": {
"total": 5159.5490313562495,
"count": 14040,
"self": 5159.5490313562495
}
}
}
}
}
}
},
"trainer_threads": {
"total": 4.169996827840805e-07,
"count": 1,
"self": 4.169996827840805e-07
},
"TrainerController._save_models": {
"total": 0.08050925000134157,
"count": 1,
"self": 0.0004910409988951869,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08001820900244638,
"count": 1,
"self": 0.08001820900244638
}
}
}
}
}
}
}