cfmy's picture
First Pyramids
a07330c
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.5697529315948486,
"min": 0.5697529315948486,
"max": 1.4282350540161133,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 17083.47265625,
"min": 17083.47265625,
"max": 43326.9375,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989958.0,
"min": 29952.0,
"max": 989958.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989958.0,
"min": 29952.0,
"max": 989958.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.027781447395682335,
"min": -0.1567494124174118,
"max": 0.10649461299180984,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 6.806454658508301,
"min": -37.14961242675781,
"max": 26.410663604736328,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.09998150169849396,
"min": -0.10298889875411987,
"max": 0.6416728496551514,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -24.495468139648438,
"min": -25.54124641418457,
"max": 152.0764617919922,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06580670452386897,
"min": 0.06420853877676838,
"max": 0.07175141374133338,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9212938633341656,
"min": 0.48710751419233944,
"max": 1.0241472059318695,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01111924657790533,
"min": 0.0004095529458446011,
"max": 0.014578841951562237,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.15566945209067462,
"min": 0.005324188295979814,
"max": 0.20122647359237497,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.454518943764286e-06,
"min": 7.454518943764286e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0001043632652127,
"min": 0.0001043632652127,
"max": 0.0035073866308712003,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10248480714285713,
"min": 0.10248480714285713,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4347872999999998,
"min": 1.3886848,
"max": 2.5691288000000005,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002582322335714286,
"min": 0.0002582322335714286,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0036152512700000005,
"min": 0.0036152512700000005,
"max": 0.11693596712000001,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.017260180786252022,
"min": 0.01671287976205349,
"max": 0.701418399810791,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.2416425347328186,
"min": 0.23398031294345856,
"max": 4.909928798675537,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 786.2162162162163,
"min": 745.8947368421053,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29090.0,
"min": 15984.0,
"max": 33200.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 0.04995785339882499,
"min": -1.0000000521540642,
"max": 0.3417729368886432,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 1.8983984291553497,
"min": -30.993601590394974,
"max": 12.645598664879799,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 0.04995785339882499,
"min": -1.0000000521540642,
"max": 0.3417729368886432,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 1.8983984291553497,
"min": -30.993601590394974,
"max": 12.645598664879799,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.14105907934687198,
"min": 0.1294292282388621,
"max": 13.87622656673193,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 5.360245015181135,
"min": 4.788881444837898,
"max": 222.01962506771088,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1769141633",
"python_version": "3.8.20 (default, Sep 7 2024, 18:35:08) \n[GCC 11.4.0]",
"command_line_arguments": "/home/cfmy/rl/deep-rl-class-zh/.venv38/bin/mlagents-learn --force ./ml-agents/config/ppo/PyramidsRND.yaml --env=./trained-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids_Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.23.5",
"end_time_seconds": "1769143283"
},
"total": 1649.3432424259954,
"count": 1,
"self": 0.5271806520031532,
"children": {
"run_training.setup": {
"total": 0.023424899991368875,
"count": 1,
"self": 0.023424899991368875
},
"TrainerController.start_learning": {
"total": 1648.792636874001,
"count": 1,
"self": 1.3130523440631805,
"children": {
"TrainerController._reset_env": {
"total": 2.4656410420138855,
"count": 1,
"self": 2.4656410420138855
},
"TrainerController.advance": {
"total": 1644.9661176809168,
"count": 63242,
"self": 1.279258410053444,
"children": {
"env_step": {
"total": 1170.0897326112463,
"count": 63242,
"self": 1097.9548885850818,
"children": {
"SubprocessEnvManager._take_step": {
"total": 71.33310817070014,
"count": 63242,
"self": 3.0629787500802195,
"children": {
"TorchPolicy.evaluate": {
"total": 68.27012942061992,
"count": 62566,
"self": 14.928763125877595,
"children": {
"TorchPolicy.sample_actions": {
"total": 53.34136629474233,
"count": 62566,
"self": 53.34136629474233
}
}
}
}
},
"workers": {
"total": 0.8017358554643579,
"count": 63242,
"self": 0.0,
"children": {
"worker_root": {
"total": 1645.669839021968,
"count": 63242,
"is_parallel": true,
"self": 628.1513156488363,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0018416250095469877,
"count": 1,
"is_parallel": true,
"self": 0.0006031470111338422,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012384779984131455,
"count": 8,
"is_parallel": true,
"self": 0.0012384779984131455
}
}
},
"UnityEnvironment.step": {
"total": 0.03076878099818714,
"count": 1,
"is_parallel": true,
"self": 0.00022137799533084035,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0003743050037883222,
"count": 1,
"is_parallel": true,
"self": 0.0003743050037883222
},
"communicator.exchange": {
"total": 0.029574961998150684,
"count": 1,
"is_parallel": true,
"self": 0.029574961998150684
},
"steps_from_proto": {
"total": 0.0005981360009172931,
"count": 1,
"is_parallel": true,
"self": 0.00016027498350013047,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.00043786101741716266,
"count": 8,
"is_parallel": true,
"self": 0.00043786101741716266
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1017.5185233731318,
"count": 63241,
"is_parallel": true,
"self": 14.65468421546393,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 9.33519983770384,
"count": 63241,
"is_parallel": true,
"self": 9.33519983770384
},
"communicator.exchange": {
"total": 955.8037445731752,
"count": 63241,
"is_parallel": true,
"self": 955.8037445731752
},
"steps_from_proto": {
"total": 37.72489474678878,
"count": 63241,
"is_parallel": true,
"self": 10.440495088492753,
"children": {
"_process_rank_one_or_two_observation": {
"total": 27.284399658296024,
"count": 505928,
"is_parallel": true,
"self": 27.284399658296024
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 473.59712665961706,
"count": 63242,
"self": 2.243827484344365,
"children": {
"process_trajectory": {
"total": 91.52122798137134,
"count": 63242,
"self": 91.40228566036967,
"children": {
"RLTrainer._checkpoint": {
"total": 0.11894232100166846,
"count": 2,
"self": 0.11894232100166846
}
}
},
"_update_policy": {
"total": 379.83207119390136,
"count": 447,
"self": 110.84842566719453,
"children": {
"TorchPPOOptimizer.update": {
"total": 268.9836455267068,
"count": 22779,
"self": 268.9836455267068
}
}
}
}
}
}
},
"trainer_threads": {
"total": 6.990012479946017e-07,
"count": 1,
"self": 6.990012479946017e-07
},
"TrainerController._save_models": {
"total": 0.04782510800578166,
"count": 1,
"self": 0.0008432119939243421,
"children": {
"RLTrainer._checkpoint": {
"total": 0.046981896011857316,
"count": 1,
"self": 0.046981896011857316
}
}
}
}
}
}
}