First Push

a541bdb verified 9 months ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 1.5260498523712158,
	"min": 1.47836434841156,
	"max": 1.5717905759811401,
	"count": 230
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 30472.1640625,
	"min": 23978.40234375,
	"max": 38699.16796875,
	"count": 230
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 70.20289855072464,
	"min": 58.97560975609756,
	"max": 111.08695652173913,
	"count": 230
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19376.0,
	"min": 14248.0,
	"max": 21124.0,
	"count": 230
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1712.0493582103122,
	"min": 1700.9733828056937,
	"max": 1764.6453188312037,
	"count": 230
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 236262.81143302307,
	"min": 135199.9946404582,
	"max": 293076.2351296764,
	"count": 230
	},
	"SoccerTwos.Step.mean": {
	"value": 29999974.0,
	"min": 27709981.0,
	"max": 29999974.0,
	"count": 230
	},
	"SoccerTwos.Step.sum": {
	"value": 29999974.0,
	"min": 27709981.0,
	"max": 29999974.0,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": -0.02527669444680214,
	"min": -0.12162230163812637,
	"max": 0.04789558798074722,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": -3.513460636138916,
	"min": -18.121723175048828,
	"max": 7.892575263977051,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.02623376064002514,
	"min": -0.12381523102521896,
	"max": 0.050768427550792694,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": -3.6464927196502686,
	"min": -18.448469161987305,
	"max": 8.272490501403809,
	"count": 230
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 230
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.09223597041136927,
	"min": -0.3490041961203088,
	"max": 0.36174961674304407,
	"count": 230
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 12.820799887180328,
	"min": -49.90760004520416,
	"max": 47.389199793338776,
	"count": 230
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.09223597041136927,
	"min": -0.3490041961203088,
	"max": 0.36174961674304407,
	"count": 230
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 12.820799887180328,
	"min": -49.90760004520416,
	"max": 47.389199793338776,
	"count": 230
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 230
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 230
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.01872896176064387,
	"min": 0.012019599394019071,
	"max": 0.024193876256079723,
	"count": 111
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.01872896176064387,
	"min": 0.012019599394019071,
	"max": 0.024193876256079723,
	"count": 111
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.09518186748027802,
	"min": 0.0706393318871657,
	"max": 0.09518186748027802,
	"count": 111
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.09518186748027802,
	"min": 0.0706393318871657,
	"max": 0.09518186748027802,
	"count": 111
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.09562629113594691,
	"min": 0.07152016038695971,
	"max": 0.09562629113594691,
	"count": 111
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.09562629113594691,
	"min": 0.07152016038695971,
	"max": 0.09562629113594691,
	"count": 111
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 111
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.0003,
	"min": 0.0003,
	"max": 0.0003,
	"count": 111
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 111
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.20000000000000007,
	"min": 0.20000000000000007,
	"max": 0.20000000000000007,
	"count": 111
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 111
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.005000000000000001,
	"min": 0.005000000000000001,
	"max": 0.005000000000000001,
	"count": 111
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1747997676",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/misbah/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.7.0+cu126",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1748003870"
	},
	"total": 6193.243015332002,
	"count": 1,
	"self": 0.2190838940005051,
	"children": {
	"run_training.setup": {
	"total": 0.008608447009464726,
	"count": 1,
	"self": 0.008608447009464726
	},
	"TrainerController.start_learning": {
	"total": 6193.015322990992,
	"count": 1,
	"self": 2.8038882425171323,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.4619750839920016,
	"count": 13,
	"self": 1.4619750839920016
	},
	"TrainerController.advance": {
	"total": 6188.660720257481,
	"count": 155986,
	"self": 2.6640657623356674,
	"children": {
	"env_step": {
	"total": 1907.5624348415295,
	"count": 155986,
	"self": 1611.387107489776,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 294.368761390273,
	"count": 155986,
	"self": 14.094670786944334,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 280.27409060332866,
	"count": 288798,
	"self": 280.27409060332866
	}
	}
	},
	"workers": {
	"total": 1.8065659614803735,
	"count": 155986,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 6188.076122690254,
	"count": 155986,
	"is_parallel": true,
	"self": 4824.377654081254,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0013928839907748625,
	"count": 2,
	"is_parallel": true,
	"self": 0.000349724039551802,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010431599512230605,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010431599512230605
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.014681180997285992,
	"count": 1,
	"is_parallel": true,
	"self": 0.00035585599835030735,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0002818450011545792,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002818450011545792
	},
	"communicator.exchange": {
	"total": 0.013071444001980126,
	"count": 1,
	"is_parallel": true,
	"self": 0.013071444001980126
	},
	"steps_from_proto": {
	"total": 0.0009720359958009794,
	"count": 2,
	"is_parallel": true,
	"self": 0.0001990060118259862,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007730299839749932,
	"count": 8,
	"is_parallel": true,
	"self": 0.0007730299839749932
	}
	}
	}
	}
	}
	}
	},
	"steps_from_proto": {
	"total": 0.014256768015911803,
	"count": 24,
	"is_parallel": true,
	"self": 0.002820325084030628,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.011436442931881174,
	"count": 96,
	"is_parallel": true,
	"self": 0.011436442931881174
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1363.684211840984,
	"count": 155985,
	"is_parallel": true,
	"self": 67.68927301358781,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 75.72199507434561,
	"count": 155985,
	"is_parallel": true,
	"self": 75.72199507434561
	},
	"communicator.exchange": {
	"total": 1002.7482204776024,
	"count": 155985,
	"is_parallel": true,
	"self": 1002.7482204776024
	},
	"steps_from_proto": {
	"total": 217.5247232754482,
	"count": 311970,
	"is_parallel": true,
	"self": 46.355050702157314,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 171.16967257329088,
	"count": 1247880,
	"is_parallel": true,
	"self": 171.16967257329088
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 4278.434219653616,
	"count": 155986,
	"self": 18.51109605266538,
	"children": {
	"process_trajectory": {
	"total": 281.2572263569018,
	"count": 155986,
	"self": 280.7852156188892,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.47201073801261373,
	"count": 5,
	"self": 0.47201073801261373
	}
	}
	},
	"_update_policy": {
	"total": 3978.665897244049,
	"count": 111,
	"self": 154.19840713523445,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 3824.4674901088147,
	"count": 3330,
	"self": 3824.4674901088147
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.970068741589785e-07,
	"count": 1,
	"self": 5.970068741589785e-07
	},
	"TrainerController._save_models": {
	"total": 0.08873880999453831,
	"count": 1,
	"self": 0.0013925339881097898,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08734627600642852,
	"count": 1,
	"self": 0.08734627600642852
	}
	}
	}
	}
	}
	}
	}