First Pyramids RND Training

fc1928f about 3 years ago

19.2 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.6705056428909302,
	"min": 0.6705056428909302,
	"max": 1.4928065538406372,
	"count": 33
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 20061.529296875,
	"min": 20061.529296875,
	"max": 45285.78125,
	"count": 33
	},
	"Pyramids.Step.mean": {
	"value": 989994.0,
	"min": 29952.0,
	"max": 989994.0,
	"count": 33
	},
	"Pyramids.Step.sum": {
	"value": 989994.0,
	"min": 29952.0,
	"max": 989994.0,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.4127757251262665,
	"min": -0.1291954070329666,
	"max": 0.45354288816452026,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 109.38556671142578,
	"min": -31.394485473632812,
	"max": 120.1888656616211,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.015695733949542046,
	"min": -0.013651584275066853,
	"max": 0.24359925091266632,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 4.159369468688965,
	"min": -3.5084571838378906,
	"max": 59.194618225097656,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06404971038171511,
	"min": 0.06266068842905084,
	"max": 0.073523204553633,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.8966959453440115,
	"min": 0.48501751614913835,
	"max": 1.077593795101469,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.013097452762199932,
	"min": 0.0003512503751461677,
	"max": 0.013097452762199932,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.18336433867079904,
	"min": 0.004917505252046348,
	"max": 0.18336433867079904,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 7.542290343078569e-06,
	"min": 7.542290343078569e-06,
	"max": 0.00029515063018788575,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 0.00010559206480309997,
	"min": 0.00010559206480309997,
	"max": 0.0037600615466461995,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10251406428571429,
	"min": 0.10251406428571429,
	"max": 0.19838354285714285,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4351969,
	"min": 1.3886848,
	"max": 2.6533538000000005,
	"count": 33
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.00026115502214285705,
	"min": 0.00026115502214285705,
	"max": 0.00983851593142857,
	"count": 33
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.003656170309999999,
	"min": 0.003656170309999999,
	"max": 0.12535004462000002,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.009108264930546284,
	"min": 0.008803884498775005,
	"max": 0.34607669711112976,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.12751570343971252,
	"min": 0.12325438112020493,
	"max": 2.422536849975586,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 456.5625,
	"min": 440.92537313432837,
	"max": 999.0,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 29220.0,
	"min": 15984.0,
	"max": 33329.0,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.3512285479477473,
	"min": -1.0000000521540642,
	"max": 1.4502521476883818,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 85.12739852070808,
	"min": -31.99200163781643,
	"max": 100.06739819049835,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.3512285479477473,
	"min": -1.0000000521540642,
	"max": 1.4502521476883818,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 85.12739852070808,
	"min": -31.99200163781643,
	"max": 100.06739819049835,
	"count": 33
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.043453433676895366,
	"min": 0.0409107589694574,
	"max": 6.2652354864403605,
	"count": 33
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.737566321644408,
	"min": 2.5058543229242787,
	"max": 100.24376778304577,
	"count": 33
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1674568113",
	"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.21.6",
	"end_time_seconds": "1674569995"
	},
	"total": 1882.1245052600002,
	"count": 1,
	"self": 0.42482009200011817,
	"children": {
	"run_training.setup": {
	"total": 0.104002027000206,
	"count": 1,
	"self": 0.104002027000206
	},
	"TrainerController.start_learning": {
	"total": 1881.5956831409999,
	"count": 1,
	"self": 1.145293100094932,
	"children": {
	"TrainerController._reset_env": {
	"total": 5.899422795000191,
	"count": 1,
	"self": 5.899422795000191
	},
	"TrainerController.advance": {
	"total": 1874.4603272479062,
	"count": 63586,
	"self": 1.152203549891965,
	"children": {
	"env_step": {
	"total": 1231.553232520012,
	"count": 63586,
	"self": 1128.877568749007,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 101.97729908797828,
	"count": 63586,
	"self": 4.147804159810676,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 97.8294949281676,
	"count": 62571,
	"self": 33.013349410192404,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 64.8161455179752,
	"count": 62571,
	"self": 64.8161455179752
	}
	}
	}
	}
	},
	"workers": {
	"total": 0.6983646830267389,
	"count": 63586,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 1878.2604549629532,
	"count": 63586,
	"is_parallel": true,
	"self": 839.9675376879495,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0017214280001098814,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006271379997997428,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0010942900003101386,
	"count": 8,
	"is_parallel": true,
	"self": 0.0010942900003101386
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.06349345499984338,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004551489996629243,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0006826059998275014,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006826059998275014
	},
	"communicator.exchange": {
	"total": 0.06079205000014554,
	"count": 1,
	"is_parallel": true,
	"self": 0.06079205000014554
	},
	"steps_from_proto": {
	"total": 0.0015636500002074172,
	"count": 1,
	"is_parallel": true,
	"self": 0.00039820399979362264,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0011654460004137945,
	"count": 8,
	"is_parallel": true,
	"self": 0.0011654460004137945
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1038.2929172750037,
	"count": 63585,
	"is_parallel": true,
	"self": 26.2754179409244,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 23.827665370999966,
	"count": 63585,
	"is_parallel": true,
	"self": 23.827665370999966
	},
	"communicator.exchange": {
	"total": 892.5034664950003,
	"count": 63585,
	"is_parallel": true,
	"self": 892.5034664950003
	},
	"steps_from_proto": {
	"total": 95.68636746807897,
	"count": 63585,
	"is_parallel": true,
	"self": 20.937063893118193,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 74.74930357496078,
	"count": 508680,
	"is_parallel": true,
	"self": 74.74930357496078
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 641.7548911780023,
	"count": 63586,
	"self": 2.150415970015729,
	"children": {
	"process_trajectory": {
	"total": 139.97571331998688,
	"count": 63586,
	"self": 139.79172760798792,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.1839857119989574,
	"count": 2,
	"self": 0.1839857119989574
	}
	}
	},
	"_update_policy": {
	"total": 499.62876188799964,
	"count": 455,
	"self": 178.9945851579846,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 320.63417673001504,
	"count": 22746,
	"self": 320.63417673001504
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 9.279992809752002e-07,
	"count": 1,
	"self": 9.279992809752002e-07
	},
	"TrainerController._save_models": {
	"total": 0.09063906999926985,
	"count": 1,
	"self": 0.001395679998495325,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08924339000077453,
	"count": 1,
	"self": 0.08924339000077453
	}
	}
	}
	}
	}
	}
	}