initial commit: soccer twos trained for 10M steps

10a1027 about 3 years ago

15.6 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.0250492095947266,
	"min": 1.969856858253479,
	"max": 3.295700788497925,
	"count": 1000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 41343.40234375,
	"min": 20036.466796875,
	"max": 139695.78125,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 46.666666666666664,
	"min": 38.38095238095238,
	"max": 999.0,
	"count": 1000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19600.0,
	"min": 11172.0,
	"max": 30276.0,
	"count": 1000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1467.962624332801,
	"min": 1188.8156360974353,
	"max": 1467.962624332801,
	"count": 605
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 308272.1511098882,
	"min": 2383.234956677933,
	"max": 368481.32199275703,
	"count": 605
	},
	"SoccerTwos.Step.mean": {
	"value": 9999858.0,
	"min": 9154.0,
	"max": 9999858.0,
	"count": 1000
	},
	"SoccerTwos.Step.sum": {
	"value": 9999858.0,
	"min": 9154.0,
	"max": 9999858.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.04327896237373352,
	"min": -0.11024277657270432,
	"max": 0.16163739562034607,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 9.088582038879395,
	"min": -23.040740966796875,
	"max": 28.464092254638672,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.04228388890624046,
	"min": -0.1127559170126915,
	"max": 0.16538487374782562,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 8.879616737365723,
	"min": -23.56598663330078,
	"max": 27.964107513427734,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.10279428703444345,
	"min": -0.7293440008163452,
	"max": 0.43915294198428884,
	"count": 1000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 21.586800277233124,
	"min": -40.9371999502182,
	"max": 53.12920010089874,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.10279428703444345,
	"min": -0.7293440008163452,
	"max": 0.43915294198428884,
	"count": 1000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 21.586800277233124,
	"min": -40.9371999502182,
	"max": 53.12920010089874,
	"count": 1000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 1000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.01720704310379612,
	"min": 0.010555019648745657,
	"max": 0.023545318642087903,
	"count": 468
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.01720704310379612,
	"min": 0.010555019648745657,
	"max": 0.023545318642087903,
	"count": 468
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.1077395275235176,
	"min": 4.276794488925854e-09,
	"max": 0.12304671903451284,
	"count": 468
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.1077395275235176,
	"min": 4.276794488925854e-09,
	"max": 0.12304671903451284,
	"count": 468
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.10922645876804987,
	"min": 5.465641420935678e-09,
	"max": 0.12631079827745756,
	"count": 468
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.10922645876804987,
	"min": 5.465641420935678e-09,
	"max": 0.12631079827745756,
	"count": 468
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 468
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 468
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 468
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 468
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 468
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 468
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1675511306",
	"python_version": "3.9.7 (default, Oct 6 2021, 10:18:28) \n[Clang 13.0.0 (clang-1300.0.29.3)]",
	"command_line_arguments": "/Users/agercas/.pyenv/versions/hf_drl_unit7/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/macos/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos10M --no-graphics --force",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1675556486"
	},
	"total": 45178.522654042,
	"count": 1,
	"self": 0.19941029199253535,
	"children": {
	"run_training.setup": {
	"total": 0.026308125000000016,
	"count": 1,
	"self": 0.026308125000000016
	},
	"TrainerController.start_learning": {
	"total": 45178.296935625,
	"count": 1,
	"self": 7.231531200646714,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.5023442920175,
	"count": 50,
	"self": 4.5023442920175
	},
	"TrainerController.advance": {
	"total": 45166.482550465334,
	"count": 663775,
	"self": 6.626405849769071,
	"children": {
	"env_step": {
	"total": 37762.5261885176,
	"count": 663775,
	"self": 36662.77503542597,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 1094.6808622603596,
	"count": 663775,
	"self": 33.88777491465271,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 1060.793087345707,
	"count": 1283248,
	"self": 1060.793087345707
	}
	}
	},
	"workers": {
	"total": 5.070290831270878,
	"count": 663775,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 45162.97832394736,
	"count": 663775,
	"is_parallel": true,
	"self": 9542.034145479447,
	"children": {
	"steps_from_proto": {
	"total": 0.0811352509911385,
	"count": 100,
	"is_parallel": true,
	"self": 0.009099664929583362,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.07203558606155513,
	"count": 400,
	"is_parallel": true,
	"self": 0.07203558606155513
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 35620.86304321692,
	"count": 663775,
	"is_parallel": true,
	"self": 88.35763253782352,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 613.2643464051904,
	"count": 663775,
	"is_parallel": true,
	"self": 613.2643464051904
	},
	"communicator.exchange": {
	"total": 33688.85765295024,
	"count": 663775,
	"is_parallel": true,
	"self": 33688.85765295024
	},
	"steps_from_proto": {
	"total": 1230.383411323672,
	"count": 1327550,
	"is_parallel": true,
	"self": 135.1490761978921,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 1095.2343351257798,
	"count": 5310200,
	"is_parallel": true,
	"self": 1095.2343351257798
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 7397.329956097961,
	"count": 663775,
	"self": 58.71258690434843,
	"children": {
	"process_trajectory": {
	"total": 1248.6041286876475,
	"count": 663775,
	"self": 1246.798400479638,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.805728208009441,
	"count": 20,
	"self": 1.805728208009441
	}
	}
	},
	"_update_policy": {
	"total": 6090.013240505965,
	"count": 468,
	"self": 930.4642091497153,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 5159.5490313562495,
	"count": 14040,
	"self": 5159.5490313562495
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.169996827840805e-07,
	"count": 1,
	"self": 4.169996827840805e-07
	},
	"TrainerController._save_models": {
	"total": 0.08050925000134157,
	"count": 1,
	"self": 0.0004910409988951869,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08001820900244638,
	"count": 1,
	"self": 0.08001820900244638
	}
	}
	}
	}
	}
	}
	}