First training of PyramidsRND

e46304e verified over 1 year ago

18.7 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.327570378780365,
	"min": 0.32489824295043945,
	"max": 1.3800469636917114,
	"count": 40
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 9769.458984375,
	"min": 9705.3603515625,
	"max": 41865.10546875,
	"count": 40
	},
	"Pyramids.Step.mean": {
	"value": 1199889.0,
	"min": 29900.0,
	"max": 1199889.0,
	"count": 40
	},
	"Pyramids.Step.sum": {
	"value": 1199889.0,
	"min": 29900.0,
	"max": 1199889.0,
	"count": 40
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.48751309514045715,
	"min": -0.1173413023352623,
	"max": 0.6807485222816467,
	"count": 40
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 132.6035614013672,
	"min": -27.80988883972168,
	"max": 195.5826416015625,
	"count": 40
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.04021748527884483,
	"min": 0.008450839668512344,
	"max": 0.5263012647628784,
	"count": 40
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 10.939155578613281,
	"min": 2.3239808082580566,
	"max": 124.7333984375,
	"count": 40
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06841986396743562,
	"min": 0.06370052399454705,
	"max": 0.07400130381358816,
	"count": 40
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9578780955440986,
	"min": 0.5168710557316382,
	"max": 1.0584646375432119,
	"count": 40
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.015359580670649717,
	"min": 0.0014043163802439597,
	"max": 0.01773342532743256,
	"count": 40
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.21503412938909605,
	"min": 0.016283816297613572,
	"max": 0.2558419337446514,
	"count": 40
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 3.742705895321429e-06,
	"min": 3.742705895321429e-06,
	"max": 0.00029602928703785714,
	"count": 40
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 5.239788253450001e-05,
	"min": 5.239788253450001e-05,
	"max": 0.003464327645224166,
	"count": 40
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10124753571428571,
	"min": 0.10124753571428571,
	"max": 0.19867642857142861,
	"count": 40
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4174655,
	"min": 1.3907350000000003,
	"max": 2.5382194166666667,
	"count": 40
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00013462881785714288,
	"min": 0.00013462881785714288,
	"max": 0.009867775214285711,
	"count": 40
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0018848034500000004,
	"min": 0.0018848034500000004,
	"max": 0.11549210575000002,
	"count": 40
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.01142031792551279,
	"min": 0.01142031792551279,
	"max": 0.46852198243141174,
	"count": 40
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.15988445281982422,
	"min": 0.15988445281982422,
	"max": 3.279653787612915,
	"count": 40
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 359.7402597402597,
	"min": 278.3883495145631,
	"max": 997.65625,
	"count": 40
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 27700.0,
	"min": 16571.0,
	"max": 33398.0,
	"count": 40
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.536332450613573,
	"min": -0.8735438012517989,
	"max": 1.7021863955317191,
	"count": 40
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 118.29759869724512,
	"min": -28.10320170968771,
	"max": 175.32519873976707,
	"count": 40
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.536332450613573,
	"min": -0.8735438012517989,
	"max": 1.7021863955317191,
	"count": 40
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 118.29759869724512,
	"min": -28.10320170968771,
	"max": 175.32519873976707,
	"count": 40
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.0428759866415269,
	"min": 0.03539874870160559,
	"max": 8.11188198976657,
	"count": 40
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 3.3014509713975713,
	"min": 3.3014509713975713,
	"max": 137.90199382603168,
	"count": 40
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1722239453",
	"python_version": "3.10.12 (main, Mar 22 2024, 16:50:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1722242161"
	},
	"total": 2708.339048912,
	"count": 1,
	"self": 0.49355574600031105,
	"children": {
	"run_training.setup": {
	"total": 0.061652654999988954,
	"count": 1,
	"self": 0.061652654999988954
	},
	"TrainerController.start_learning": {
	"total": 2707.783840511,
	"count": 1,
	"self": 1.6968191580212988,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.8411980309999763,
	"count": 1,
	"self": 2.8411980309999763
	},
	"TrainerController.advance": {
	"total": 2703.156866306979,
	"count": 76786,
	"self": 1.779406122981527,
	"children": {
	"env_step": {
	"total": 1926.3665606120298,
	"count": 76786,
	"self": 1763.6004900300732,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 161.73515855100368,
	"count": 76786,
	"self": 5.812495826005716,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 155.92266272499796,
	"count": 75050,
	"self": 155.92266272499796
	}
	}
	},
	"workers": {
	"total": 1.0309120309530044,
	"count": 76786,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2701.530333084018,
	"count": 76786,
	"is_parallel": true,
	"self": 1086.1473075450244,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.005034563999970487,
	"count": 1,
	"is_parallel": true,
	"self": 0.0034628719998863744,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0015716920000841128,
	"count": 8,
	"is_parallel": true,
	"self": 0.0015716920000841128
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.048841078000009475,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006621390000418614,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00046770699998432974,
	"count": 1,
	"is_parallel": true,
	"self": 0.00046770699998432974
	},
	"communicator.exchange": {
	"total": 0.0460306090000131,
	"count": 1,
	"is_parallel": true,
	"self": 0.0460306090000131
	},
	"steps_from_proto": {
	"total": 0.0016806229999701827,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003793310000901329,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013012919998800498,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013012919998800498
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1615.3830255389935,
	"count": 76785,
	"is_parallel": true,
	"self": 41.08903519380624,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 28.991106889026923,
	"count": 76785,
	"is_parallel": true,
	"self": 28.991106889026923
	},
	"communicator.exchange": {
	"total": 1424.2228821570955,
	"count": 76785,
	"is_parallel": true,
	"self": 1424.2228821570955
	},
	"steps_from_proto": {
	"total": 121.08000129906486,
	"count": 76785,
	"is_parallel": true,
	"self": 24.951559190169746,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 96.12844210889511,
	"count": 614280,
	"is_parallel": true,
	"self": 96.12844210889511
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 775.0108995719673,
	"count": 76786,
	"self": 3.33737299992265,
	"children": {
	"process_trajectory": {
	"total": 158.6718370720415,
	"count": 76786,
	"self": 158.31062389904162,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.3612131729998964,
	"count": 2,
	"self": 0.3612131729998964
	}
	}
	},
	"_update_policy": {
	"total": 613.0016895000032,
	"count": 546,
	"self": 363.2883043819959,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 249.7133851180073,
	"count": 27381,
	"self": 249.7133851180073
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.0660000953066628e-06,
	"count": 1,
	"self": 1.0660000953066628e-06
	},
	"TrainerController._save_models": {
	"total": 0.08895594900013748,
	"count": 1,
	"self": 0.0016204910002670658,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08733545799987041,
	"count": 1,
	"self": 0.08733545799987041
	}
	}
	}
	}
	}
	}
	}