First Push

69e80e0 verified 9 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.438185214996338,
	"min": 1.2871428728103638,
	"max": 3.295762777328491,
	"count": 5000
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 29546.078125,
	"min": 18158.86328125,
	"max": 128507.140625,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 80.08333333333333,
	"min": 42.663716814159294,
	"max": 999.0,
	"count": 5000
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19220.0,
	"min": 5000.0,
	"max": 29028.0,
	"count": 5000
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1725.4868357549199,
	"min": 1193.015162420967,
	"max": 1786.8978130593764,
	"count": 4983
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 207058.42029059038,
	"min": 2386.030324841934,
	"max": 369536.6967138476,
	"count": 4983
	},
	"SoccerTwos.Step.mean": {
	"value": 49999983.0,
	"min": 9530.0,
	"max": 49999983.0,
	"count": 5000
	},
	"SoccerTwos.Step.sum": {
	"value": 49999983.0,
	"min": 9530.0,
	"max": 49999983.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.06711877137422562,
	"min": -0.13952656090259552,
	"max": 0.18732668459415436,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -8.121371269226074,
	"min": -26.649574279785156,
	"max": 26.909523010253906,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.06701740622520447,
	"min": -0.1348048895597458,
	"max": 0.19345541298389435,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -8.109106063842773,
	"min": -25.696334838867188,
	"max": 28.63368797302246,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": -0.022000001481741912,
	"min": -0.5714285714285714,
	"max": 0.4471384584903717,
	"count": 5000
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": -2.6620001792907715,
	"min": -77.37279975414276,
	"max": 56.36819976568222,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": -0.022000001481741912,
	"min": -0.5714285714285714,
	"max": 0.4471384584903717,
	"count": 5000
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": -2.6620001792907715,
	"min": -77.37279975414276,
	"max": 56.36819976568222,
	"count": 5000
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 5000
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.018539122284467642,
	"min": 0.010676135318014228,
	"max": 0.026691932663864767,
	"count": 2422
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.018539122284467642,
	"min": 0.010676135318014228,
	"max": 0.026691932663864767,
	"count": 2422
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.07924022773901622,
	"min": 7.781588449991734e-06,
	"max": 0.11845265949765842,
	"count": 2422
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.07924022773901622,
	"min": 7.781588449991734e-06,
	"max": 0.11845265949765842,
	"count": 2422
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.08009959459304809,
	"min": 7.447628892502204e-06,
	"max": 0.12023574585715929,
	"count": 2422
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.08009959459304809,
	"min": 7.447628892502204e-06,
	"max": 0.12023574585715929,
	"count": 2422
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2422
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 2422
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 2422
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.2,
	"max": 0.20000000000000007,
	"count": 2422
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 2422
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005,
	"max": 0.005000000000000001,
	"count": 2422
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1754278794",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/bolin/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --force",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.7.1+cu126",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1754337377"
	},
	"total": 58583.416624648,
	"count": 1,
	"self": 0.1668565660074819,
	"children": {
	"run_training.setup": {
	"total": 0.015344877000188717,
	"count": 1,
	"self": 0.015344877000188717
	},
	"TrainerController.start_learning": {
	"total": 58583.234423204995,
	"count": 1,
	"self": 28.006689679357805,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.7130218009770033,
	"count": 250,
	"self": 3.7130218009770033
	},
	"TrainerController.advance": {
	"total": 58551.44855011267,
	"count": 3427750,
	"self": 26.388956220180262,
	"children": {
	"env_step": {
	"total": 23389.66532693994,
	"count": 3427750,
	"self": 19475.559716328786,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 3896.89595637029,
	"count": 3427750,
	"self": 182.23068291639538,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 3714.6652734538948,
	"count": 6284314,
	"self": 3714.6652734538948
	}
	}
	},
	"workers": {
	"total": 17.20965424086353,
	"count": 3427750,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 58544.79750145348,
	"count": 3427750,
	"is_parallel": true,
	"self": 42551.46606135835,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0014544330001626804,
	"count": 2,
	"is_parallel": true,
	"self": 0.00036373899956743116,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010906940005952492,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010906940005952492
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.012421765999988565,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003036180000890454,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0002772789998743974,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002772789998743974
	},
	"communicator.exchange": {
	"total": 0.01087160400038556,
	"count": 1,
	"is_parallel": true,
	"self": 0.01087160400038556
	},
	"steps_from_proto": {
	"total": 0.000969264999639563,
	"count": 2,
	"is_parallel": true,
	"self": 0.00022828999999546795,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007409749996440951,
	"count": 8,
	"is_parallel": true,
	"self": 0.0007409749996440951
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 15993.118884885997,
	"count": 3427749,
	"is_parallel": true,
	"self": 900.4193676668183,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 612.4480266295209,
	"count": 3427749,
	"is_parallel": true,
	"self": 612.4480266295209
	},
	"communicator.exchange": {
	"total": 11950.058678117999,
	"count": 3427749,
	"is_parallel": true,
	"self": 11950.058678117999
	},
	"steps_from_proto": {
	"total": 2530.1928124716583,
	"count": 6855498,
	"is_parallel": true,
	"self": 435.93581838643513,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 2094.256994085223,
	"count": 27421992,
	"is_parallel": true,
	"self": 2094.256994085223
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.21255520914064618,
	"count": 498,
	"is_parallel": true,
	"self": 0.03900427415192098,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.1735509349887252,
	"count": 1992,
	"is_parallel": true,
	"self": 0.1735509349887252
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 35135.39426695255,
	"count": 3427750,
	"self": 266.1699697615768,
	"children": {
	"process_trajectory": {
	"total": 5083.07728814891,
	"count": 3427750,
	"self": 5076.223328915879,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 6.853959233030309,
	"count": 100,
	"self": 6.853959233030309
	}
	}
	},
	"_update_policy": {
	"total": 29786.147009042066,
	"count": 2422,
	"self": 2878.9186776652605,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 26907.228331376806,
	"count": 72672,
	"self": 26907.228331376806
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 3.609966370277107e-07,
	"count": 1,
	"self": 3.609966370277107e-07
	},
	"TrainerController._save_models": {
	"total": 0.06616125099390047,
	"count": 1,
	"self": 0.0009089379891520366,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.06525231300474843,
	"count": 1,
	"self": 0.06525231300474843
	}
	}
	}
	}
	}
	}
	}