First training of Pyramids agent.

73137c0 verified about 1 year ago

18.7 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.44343188405036926,
	"min": 0.4422524571418762,
	"max": 1.4796161651611328,
	"count": 33
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 13288.7666015625,
	"min": 13288.7666015625,
	"max": 44885.63671875,
	"count": 33
	},
	"Pyramids.Step.mean": {
	"value": 989886.0,
	"min": 29952.0,
	"max": 989886.0,
	"count": 33
	},
	"Pyramids.Step.sum": {
	"value": 989886.0,
	"min": 29952.0,
	"max": 989886.0,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.555169403553009,
	"min": -0.07640481740236282,
	"max": 0.6166432499885559,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 153.78192138671875,
	"min": -18.41356086730957,
	"max": 172.04347229003906,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.003325456753373146,
	"min": 0.003318789880722761,
	"max": 0.37097224593162537,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 0.9211515188217163,
	"min": 0.8562477827072144,
	"max": 89.77528381347656,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.0668359316824492,
	"min": 0.06574777701396935,
	"max": 0.07347759801373654,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9357030435542887,
	"min": 0.49014574319362403,
	"max": 1.0613277813536115,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.01786354147216549,
	"min": 0.00043779097343768253,
	"max": 0.018623049995943143,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.2500895806103169,
	"min": 0.006129073628127556,
	"max": 0.260722699943204,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 7.743268847514289e-06,
	"min": 7.743268847514289e-06,
	"max": 0.00029515063018788575,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 0.00010840576386520005,
	"min": 0.00010840576386520005,
	"max": 0.003758353947215399,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10258105714285715,
	"min": 0.10258105714285715,
	"max": 0.19838354285714285,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4361348,
	"min": 1.3886848,
	"max": 2.6527846,
	"count": 33
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00026784760857142864,
	"min": 0.00026784760857142864,
	"max": 0.00983851593142857,
	"count": 33
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.003749866520000001,
	"min": 0.003749866520000001,
	"max": 0.12529318154,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.013201264664530754,
	"min": 0.013201264664530754,
	"max": 0.6614483594894409,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.18481770157814026,
	"min": 0.18481770157814026,
	"max": 4.630138397216797,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 328.3723404255319,
	"min": 301.5050505050505,
	"max": 999.0,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 30867.0,
	"min": 15984.0,
	"max": 33429.0,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.6077744463022718,
	"min": -1.0000000521540642,
	"max": 1.6984949309115458,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 151.13079795241356,
	"min": -31.994001656770706,
	"max": 168.15099816024303,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.6077744463022718,
	"min": -1.0000000521540642,
	"max": 1.6984949309115458,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 151.13079795241356,
	"min": -31.994001656770706,
	"max": 168.15099816024303,
	"count": 33
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.04497627533992079,
	"min": 0.04294012762392128,
	"max": 13.38169926777482,
	"count": 33
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 4.227769881952554,
	"min": 4.147454440855654,
	"max": 214.10718828439713,
	"count": 33
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1735310534",
	"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.5.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1735313385"
	},
	"total": 2851.58034099,
	"count": 1,
	"self": 0.9401706570001807,
	"children": {
	"run_training.setup": {
	"total": 0.06219426799998473,
	"count": 1,
	"self": 0.06219426799998473
	},
	"TrainerController.start_learning": {
	"total": 2850.577976065,
	"count": 1,
	"self": 2.1806132560623155,
	"children": {
	"TrainerController._reset_env": {
	"total": 5.320990028999859,
	"count": 1,
	"self": 5.320990028999859
	},
	"TrainerController.advance": {
	"total": 2842.9209085529383,
	"count": 64096,
	"self": 2.211332235905502,
	"children": {
	"env_step": {
	"total": 2046.3581528619643,
	"count": 64096,
	"self": 1843.4711954629634,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 201.61193134709902,
	"count": 64096,
	"self": 6.166418657134045,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 195.44551268996497,
	"count": 62571,
	"self": 195.44551268996497
	}
	}
	},
	"workers": {
	"total": 1.275026051901932,
	"count": 64096,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2842.967168274056,
	"count": 64096,
	"is_parallel": true,
	"self": 1160.7467036160197,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0021826160000273376,
	"count": 1,
	"is_parallel": true,
	"self": 0.0007506369997827278,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0014319790002446098,
	"count": 8,
	"is_parallel": true,
	"self": 0.0014319790002446098
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.05516294500012009,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006724070003656379,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0005204669998875033,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005204669998875033
	},
	"communicator.exchange": {
	"total": 0.05203671799995391,
	"count": 1,
	"is_parallel": true,
	"self": 0.05203671799995391
	},
	"steps_from_proto": {
	"total": 0.0019333529999130405,
	"count": 1,
	"is_parallel": true,
	"self": 0.00042569499964884017,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0015076580002642004,
	"count": 8,
	"is_parallel": true,
	"self": 0.0015076580002642004
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1682.2204646580362,
	"count": 64095,
	"is_parallel": true,
	"self": 43.855330336063844,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 27.887135275969513,
	"count": 64095,
	"is_parallel": true,
	"self": 27.887135275969513
	},
	"communicator.exchange": {
	"total": 1486.7428550810037,
	"count": 64095,
	"is_parallel": true,
	"self": 1486.7428550810037
	},
	"steps_from_proto": {
	"total": 123.73514396499922,
	"count": 64095,
	"is_parallel": true,
	"self": 26.864576416306818,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 96.8705675486924,
	"count": 512760,
	"is_parallel": true,
	"self": 96.8705675486924
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 794.3514234550685,
	"count": 64096,
	"self": 4.014514773070232,
	"children": {
	"process_trajectory": {
	"total": 163.35734097599834,
	"count": 64096,
	"self": 163.0260445359986,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.3312964399997327,
	"count": 2,
	"self": 0.3312964399997327
	}
	}
	},
	"_update_policy": {
	"total": 626.9795677059999,
	"count": 458,
	"self": 351.5291690409997,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 275.4503986650002,
	"count": 22767,
	"self": 275.4503986650002
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.880000127130188e-06,
	"count": 1,
	"self": 1.880000127130188e-06
	},
	"TrainerController._save_models": {
	"total": 0.15546234699922934,
	"count": 1,
	"self": 0.0025254949987356667,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.15293685200049367,
	"count": 1,
	"self": 0.15293685200049367
	}
	}
	}
	}
	}
	}
	}