Push ppo-PyramidsRND

535083e verified over 1 year ago

18.8 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.18535521626472473,
	"min": 0.17403636872768402,
	"max": 1.4797165393829346,
	"count": 100
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 5590.3134765625,
	"min": 5249.81591796875,
	"max": 44888.6796875,
	"count": 100
	},
	"Pyramids.Step.mean": {
	"value": 2999887.0,
	"min": 29922.0,
	"max": 2999887.0,
	"count": 100
	},
	"Pyramids.Step.sum": {
	"value": 2999887.0,
	"min": 29922.0,
	"max": 2999887.0,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.8328748345375061,
	"min": -0.1038481667637825,
	"max": 0.9149202704429626,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 247.36383056640625,
	"min": -25.131256103515625,
	"max": 287.28497314453125,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.014337725937366486,
	"min": -0.06541164219379425,
	"max": 0.35815566778182983,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 4.258304595947266,
	"min": -17.661144256591797,
	"max": 84.88289642333984,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.07103111432565473,
	"min": 0.06337840792613596,
	"max": 0.07297746869827175,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 1.065466714884821,
	"min": 0.5108422808879023,
	"max": 1.0730015888533957,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.013715189750636533,
	"min": 0.00026187829616438767,
	"max": 0.017055366827471437,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.205727846259548,
	"min": 0.003142539553972652,
	"max": 0.23877513558460015,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 1.519166160311104e-06,
	"min": 1.519166160311104e-06,
	"max": 0.0002984114005295333,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 2.278749240466656e-05,
	"min": 2.278749240466656e-05,
	"max": 0.0039695695768101665,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10050635555555557,
	"min": 0.10050635555555557,
	"max": 0.19947046666666668,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.5075953333333334,
	"min": 1.3962932666666668,
	"max": 2.8124718666666673,
	"count": 100
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 6.0584919999999754e-05,
	"min": 6.0584919999999754e-05,
	"max": 0.009947099619999998,
	"count": 100
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0009087737999999964,
	"min": 0.0009087737999999964,
	"max": 0.13232666434999998,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.0063836174085736275,
	"min": 0.006180214695632458,
	"max": 0.3896730840206146,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.09575425833463669,
	"min": 0.08652300387620926,
	"max": 2.7277116775512695,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 221.85714285714286,
	"min": 196.81045751633988,
	"max": 999.0,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 27954.0,
	"min": 16721.0,
	"max": 32323.0,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.7465763624141535,
	"min": -0.999903277524056,
	"max": 1.7946824220990814,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 221.8151980265975,
	"min": -30.997001603245735,
	"max": 273.873198479414,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.7465763624141535,
	"min": -0.999903277524056,
	"max": 1.7946824220990814,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 221.8151980265975,
	"min": -30.997001603245735,
	"max": 273.873198479414,
	"count": 100
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.01449265299299269,
	"min": 0.013690494294619613,
	"max": 7.311247367192717,
	"count": 100
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 1.8405669301100716,
	"min": 1.8405669301100716,
	"max": 124.29120524227619,
	"count": 100
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1722240639",
	"python_version": "3.10.12 (main, Mar 22 2024, 16:50:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1722245863"
	},
	"total": 5223.930010196001,
	"count": 1,
	"self": 0.3725842540006852,
	"children": {
	"run_training.setup": {
	"total": 0.05449650799994288,
	"count": 1,
	"self": 0.05449650799994288
	},
	"TrainerController.start_learning": {
	"total": 5223.502929434,
	"count": 1,
	"self": 4.22077262915991,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.0454575560000876,
	"count": 1,
	"self": 2.0454575560000876
	},
	"TrainerController.advance": {
	"total": 5217.149424316841,
	"count": 195608,
	"self": 4.059529022582865,
	"children": {
	"env_step": {
	"total": 3460.5996767411093,
	"count": 195608,
	"self": 3082.0289851512716,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 375.96389581699225,
	"count": 195608,
	"self": 13.558149060980782,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 362.40574675601147,
	"count": 187559,
	"self": 362.40574675601147
	}
	}
	},
	"workers": {
	"total": 2.6067957728456577,
	"count": 195608,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 5216.689421653077,
	"count": 195608,
	"is_parallel": true,
	"self": 2441.499131032131,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0020848729999443094,
	"count": 1,
	"is_parallel": true,
	"self": 0.000647403999778362,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0014374690001659474,
	"count": 8,
	"is_parallel": true,
	"self": 0.0014374690001659474
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.03822499900002185,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044477200003711914,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00033218499993381556,
	"count": 1,
	"is_parallel": true,
	"self": 0.00033218499993381556
	},
	"communicator.exchange": {
	"total": 0.03629049100004522,
	"count": 1,
	"is_parallel": true,
	"self": 0.03629049100004522
	},
	"steps_from_proto": {
	"total": 0.0011575510000056966,
	"count": 1,
	"is_parallel": true,
	"self": 0.00026954099996601144,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0008880100000396851,
	"count": 8,
	"is_parallel": true,
	"self": 0.0008880100000396851
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2775.190290620946,
	"count": 195607,
	"is_parallel": true,
	"self": 68.2026290299359,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 42.8606724801208,
	"count": 195607,
	"is_parallel": true,
	"self": 42.8606724801208
	},
	"communicator.exchange": {
	"total": 2470.3606953088242,
	"count": 195607,
	"is_parallel": true,
	"self": 2470.3606953088242
	},
	"steps_from_proto": {
	"total": 193.76629380206532,
	"count": 195607,
	"is_parallel": true,
	"self": 42.12759774883557,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 151.63869605322975,
	"count": 1564856,
	"is_parallel": true,
	"self": 151.63869605322975
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1752.4902185531487,
	"count": 195608,
	"self": 8.561444220089697,
	"children": {
	"process_trajectory": {
	"total": 359.38304962905727,
	"count": 195608,
	"self": 358.8474447750573,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.5356048539999847,
	"count": 6,
	"self": 0.5356048539999847
	}
	}
	},
	"_update_policy": {
	"total": 1384.5457247040017,
	"count": 1402,
	"self": 820.6991755719698,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 563.8465491320319,
	"count": 68394,
	"self": 563.8465491320319
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.1339998309267685e-06,
	"count": 1,
	"self": 1.1339998309267685e-06
	},
	"TrainerController._save_models": {
	"total": 0.08727379799984192,
	"count": 1,
	"self": 0.0015812700003152713,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08569252799952665,
	"count": 1,
	"self": 0.08569252799952665
	}
	}
	}
	}
	}
	}
	}