Yelin Z

rl course default, 1.5mil steps

086f0bc about 3 years ago

19.2 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.22584600746631622,
	"min": 0.2226453721523285,
	"max": 1.4426076412200928,
	"count": 50
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 6771.7666015625,
	"min": 6654.4248046875,
	"max": 43762.9453125,
	"count": 50
	},
	"Pyramids.Step.mean": {
	"value": 1499939.0,
	"min": 29922.0,
	"max": 1499939.0,
	"count": 50
	},
	"Pyramids.Step.sum": {
	"value": 1499939.0,
	"min": 29922.0,
	"max": 1499939.0,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.6199408173561096,
	"min": -0.11817127466201782,
	"max": 0.755702018737793,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 174.8233184814453,
	"min": -28.006591796875,
	"max": 222.93209838867188,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": -0.004071508068591356,
	"min": -0.011213291436433792,
	"max": 0.4749000370502472,
	"count": 50
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": -1.148165225982666,
	"min": -3.251854419708252,
	"max": 112.55130767822266,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06842014983424223,
	"min": 0.06512405517914936,
	"max": 0.07405228710408195,
	"count": 50
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 1.0263022475136334,
	"min": 0.5183660097285736,
	"max": 1.094291228791311,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.013262247259491215,
	"min": 0.0007009630764837433,
	"max": 0.0167980060425526,
	"count": 50
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.19893370889236822,
	"min": 0.009813483070772405,
	"max": 0.25183738928171806,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 2.99384566875111e-06,
	"min": 2.99384566875111e-06,
	"max": 0.00029676708679192377,
	"count": 50
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 4.490768503126665e-05,
	"min": 4.490768503126665e-05,
	"max": 0.0038225965258012,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10099791555555555,
	"min": 0.10099791555555555,
	"max": 0.19892236190476195,
	"count": 50
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.5149687333333333,
	"min": 1.3924565333333336,
	"max": 2.6741988000000005,
	"count": 50
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00010969176399999997,
	"min": 0.00010969176399999997,
	"max": 0.009892343954285714,
	"count": 50
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0016453764599999995,
	"min": 0.0016453764599999995,
	"max": 0.12743246012000004,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.0068207415752112865,
	"min": 0.0068207415752112865,
	"max": 0.5988480448722839,
	"count": 50
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.10231112688779831,
	"min": 0.09818366169929504,
	"max": 4.191936492919922,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 303.83838383838383,
	"min": 246.30578512396696,
	"max": 998.4516129032259,
	"count": 50
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 30080.0,
	"min": 16849.0,
	"max": 33861.0,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.5749110948256773,
	"min": -0.9348839216174618,
	"max": 1.7471680546007236,
	"count": 50
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 155.91619838774204,
	"min": -29.83880167454481,
	"max": 207.91299849748611,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.5749110948256773,
	"min": -0.9348839216174618,
	"max": 1.7471680546007236,
	"count": 50
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 155.91619838774204,
	"min": -29.83880167454481,
	"max": 207.91299849748611,
	"count": 50
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.021247621477790874,
	"min": 0.018652499381217543,
	"max": 12.110329354510588,
	"count": 50
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.1035145263012964,
	"min": 2.1035145263012964,
	"max": 205.87559902668,
	"count": 50
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1676983073",
	"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.21.6",
	"end_time_seconds": "1676986541"
	},
	"total": 3468.108217849,
	"count": 1,
	"self": 1.223811310000201,
	"children": {
	"run_training.setup": {
	"total": 0.18848909200005437,
	"count": 1,
	"self": 0.18848909200005437
	},
	"TrainerController.start_learning": {
	"total": 3466.695917447,
	"count": 1,
	"self": 1.9729478351673606,
	"children": {
	"TrainerController._reset_env": {
	"total": 7.14927036500012,
	"count": 1,
	"self": 7.14927036500012
	},
	"TrainerController.advance": {
	"total": 3457.439610180833,
	"count": 96473,
	"self": 2.0655312508752104,
	"children": {
	"env_step": {
	"total": 2348.815149980917,
	"count": 96473,
	"self": 2181.891144852954,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 165.71798869398935,
	"count": 96473,
	"self": 6.682191134097138,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 159.03579755989222,
	"count": 93808,
	"self": 53.65109205200042,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 105.3847055078918,
	"count": 93808,
	"self": 105.3847055078918
	}
	}
	}
	}
	},
	"workers": {
	"total": 1.206016433973673,
	"count": 96473,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3459.416825108021,
	"count": 96473,
	"is_parallel": true,
	"self": 1446.363935792926,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.002489697999862983,
	"count": 1,
	"is_parallel": true,
	"self": 0.0008128120002766082,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.001676885999586375,
	"count": 8,
	"is_parallel": true,
	"self": 0.001676885999586375
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04930837400024757,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005442950000542623,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004998600002181774,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004998600002181774
	},
	"communicator.exchange": {
	"total": 0.046651129000110814,
	"count": 1,
	"is_parallel": true,
	"self": 0.046651129000110814
	},
	"steps_from_proto": {
	"total": 0.0016130899998643144,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044049999905837467,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0011725900008059398,
	"count": 8,
	"is_parallel": true,
	"self": 0.0011725900008059398
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2013.052889315095,
	"count": 96472,
	"is_parallel": true,
	"self": 46.66657504624209,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 33.57954540988885,
	"count": 96472,
	"is_parallel": true,
	"self": 33.57954540988885
	},
	"communicator.exchange": {
	"total": 1792.7428356229716,
	"count": 96472,
	"is_parallel": true,
	"self": 1792.7428356229716
	},
	"steps_from_proto": {
	"total": 140.0639332359924,
	"count": 96472,
	"is_parallel": true,
	"self": 32.886443789318946,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 107.17748944667346,
	"count": 771776,
	"is_parallel": true,
	"self": 107.17748944667346
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1106.5589289490408,
	"count": 96473,
	"self": 3.936769507037752,
	"children": {
	"process_trajectory": {
	"total": 240.19422243701501,
	"count": 96473,
	"self": 239.87085451401526,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.3233679229997506,
	"count": 3,
	"self": 0.3233679229997506
	}
	}
	},
	"_update_policy": {
	"total": 862.427937004988,
	"count": 692,
	"self": 329.95906781097483,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 532.4688691940132,
	"count": 34197,
	"self": 532.4688691940132
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.3489998309523799e-06,
	"count": 1,
	"self": 1.3489998309523799e-06
	},
	"TrainerController._save_models": {
	"total": 0.13408771699960198,
	"count": 1,
	"self": 0.0019036319999941043,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.13218408499960788,
	"count": 1,
	"self": 0.13218408499960788
	}
	}
	}
	}
	}
	}
	}