ppo-PyramidsRND1 / run_logs /timers.json
wooihen's picture
First Pyramids RND Training
fc1928f
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.6705056428909302,
"min": 0.6705056428909302,
"max": 1.4928065538406372,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 20061.529296875,
"min": 20061.529296875,
"max": 45285.78125,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989994.0,
"min": 29952.0,
"max": 989994.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989994.0,
"min": 29952.0,
"max": 989994.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.4127757251262665,
"min": -0.1291954070329666,
"max": 0.45354288816452026,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 109.38556671142578,
"min": -31.394485473632812,
"max": 120.1888656616211,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.015695733949542046,
"min": -0.013651584275066853,
"max": 0.24359925091266632,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 4.159369468688965,
"min": -3.5084571838378906,
"max": 59.194618225097656,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06404971038171511,
"min": 0.06266068842905084,
"max": 0.073523204553633,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.8966959453440115,
"min": 0.48501751614913835,
"max": 1.077593795101469,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013097452762199932,
"min": 0.0003512503751461677,
"max": 0.013097452762199932,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.18336433867079904,
"min": 0.004917505252046348,
"max": 0.18336433867079904,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.542290343078569e-06,
"min": 7.542290343078569e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010559206480309997,
"min": 0.00010559206480309997,
"max": 0.0037600615466461995,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10251406428571429,
"min": 0.10251406428571429,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4351969,
"min": 1.3886848,
"max": 2.6533538000000005,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.00026115502214285705,
"min": 0.00026115502214285705,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.003656170309999999,
"min": 0.003656170309999999,
"max": 0.12535004462000002,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.009108264930546284,
"min": 0.008803884498775005,
"max": 0.34607669711112976,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.12751570343971252,
"min": 0.12325438112020493,
"max": 2.422536849975586,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 456.5625,
"min": 440.92537313432837,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29220.0,
"min": 15984.0,
"max": 33329.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.3512285479477473,
"min": -1.0000000521540642,
"max": 1.4502521476883818,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 85.12739852070808,
"min": -31.99200163781643,
"max": 100.06739819049835,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.3512285479477473,
"min": -1.0000000521540642,
"max": 1.4502521476883818,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 85.12739852070808,
"min": -31.99200163781643,
"max": 100.06739819049835,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.043453433676895366,
"min": 0.0409107589694574,
"max": 6.2652354864403605,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.737566321644408,
"min": 2.5058543229242787,
"max": 100.24376778304577,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1674568113",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1674569995"
},
"total": 1882.1245052600002,
"count": 1,
"self": 0.42482009200011817,
"children": {
"run_training.setup": {
"total": 0.104002027000206,
"count": 1,
"self": 0.104002027000206
},
"TrainerController.start_learning": {
"total": 1881.5956831409999,
"count": 1,
"self": 1.145293100094932,
"children": {
"TrainerController._reset_env": {
"total": 5.899422795000191,
"count": 1,
"self": 5.899422795000191
},
"TrainerController.advance": {
"total": 1874.4603272479062,
"count": 63586,
"self": 1.152203549891965,
"children": {
"env_step": {
"total": 1231.553232520012,
"count": 63586,
"self": 1128.877568749007,
"children": {
"SubprocessEnvManager._take_step": {
"total": 101.97729908797828,
"count": 63586,
"self": 4.147804159810676,
"children": {
"TorchPolicy.evaluate": {
"total": 97.8294949281676,
"count": 62571,
"self": 33.013349410192404,
"children": {
"TorchPolicy.sample_actions": {
"total": 64.8161455179752,
"count": 62571,
"self": 64.8161455179752
}
}
}
}
},
"workers": {
"total": 0.6983646830267389,
"count": 63586,
"self": 0.0,
"children": {
"worker_root": {
"total": 1878.2604549629532,
"count": 63586,
"is_parallel": true,
"self": 839.9675376879495,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0017214280001098814,
"count": 1,
"is_parallel": true,
"self": 0.0006271379997997428,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0010942900003101386,
"count": 8,
"is_parallel": true,
"self": 0.0010942900003101386
}
}
},
"UnityEnvironment.step": {
"total": 0.06349345499984338,
"count": 1,
"is_parallel": true,
"self": 0.0004551489996629243,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0006826059998275014,
"count": 1,
"is_parallel": true,
"self": 0.0006826059998275014
},
"communicator.exchange": {
"total": 0.06079205000014554,
"count": 1,
"is_parallel": true,
"self": 0.06079205000014554
},
"steps_from_proto": {
"total": 0.0015636500002074172,
"count": 1,
"is_parallel": true,
"self": 0.00039820399979362264,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0011654460004137945,
"count": 8,
"is_parallel": true,
"self": 0.0011654460004137945
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1038.2929172750037,
"count": 63585,
"is_parallel": true,
"self": 26.2754179409244,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 23.827665370999966,
"count": 63585,
"is_parallel": true,
"self": 23.827665370999966
},
"communicator.exchange": {
"total": 892.5034664950003,
"count": 63585,
"is_parallel": true,
"self": 892.5034664950003
},
"steps_from_proto": {
"total": 95.68636746807897,
"count": 63585,
"is_parallel": true,
"self": 20.937063893118193,
"children": {
"_process_rank_one_or_two_observation": {
"total": 74.74930357496078,
"count": 508680,
"is_parallel": true,
"self": 74.74930357496078
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 641.7548911780023,
"count": 63586,
"self": 2.150415970015729,
"children": {
"process_trajectory": {
"total": 139.97571331998688,
"count": 63586,
"self": 139.79172760798792,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1839857119989574,
"count": 2,
"self": 0.1839857119989574
}
}
},
"_update_policy": {
"total": 499.62876188799964,
"count": 455,
"self": 178.9945851579846,
"children": {
"TorchPPOOptimizer.update": {
"total": 320.63417673001504,
"count": 22746,
"self": 320.63417673001504
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.279992809752002e-07,
"count": 1,
"self": 9.279992809752002e-07
},
"TrainerController._save_models": {
"total": 0.09063906999926985,
"count": 1,
"self": 0.001395679998495325,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08924339000077453,
"count": 1,
"self": 0.08924339000077453
}
}
}
}
}
}
}