hungtrab's picture
Second Push
115bd18 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.31941959261894226,
"min": 0.31941959261894226,
"max": 1.5913751125335693,
"count": 100
},
"Pyramids.Policy.Entropy.sum": {
"value": 9597.919921875,
"min": 9597.919921875,
"max": 48275.95703125,
"count": 100
},
"Pyramids.Step.mean": {
"value": 2999873.0,
"min": 29952.0,
"max": 2999873.0,
"count": 100
},
"Pyramids.Step.sum": {
"value": 2999873.0,
"min": 29952.0,
"max": 2999873.0,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.746780514717102,
"min": -0.1449412703514099,
"max": 0.8288553953170776,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 203.87107849121094,
"min": -34.93084716796875,
"max": 233.7372283935547,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.9310639500617981,
"min": 0.9151319265365601,
"max": 26.376020431518555,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 254.1804656982422,
"min": 252.17361450195312,
"max": 6356.62109375,
"count": 100
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.03764318283388598,
"min": 0.030007568025030195,
"max": 0.04330179998274344,
"count": 100
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.11292954850165793,
"min": 0.06378415991396955,
"max": 0.12503808459588506,
"count": 100
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.015089578854127062,
"min": 0.011777677055862216,
"max": 50.34082246057078,
"count": 100
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.04526873656238119,
"min": 0.025758922534684342,
"max": 100.68164492114155,
"count": 100
},
"Pyramids.Policy.LearningRate.mean": {
"value": 4.844772934000003e-07,
"min": 4.844772934000003e-07,
"max": 9.939200060800002e-05,
"count": 100
},
"Pyramids.Policy.LearningRate.sum": {
"value": 1.4534318802000007e-06,
"min": 1.4534318802000007e-06,
"max": 0.00029531847134820003,
"count": 100
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10048437777777779,
"min": 0.10048437777777779,
"max": 0.19939199999999999,
"count": 100
},
"Pyramids.Policy.Epsilon.sum": {
"value": 0.30145313333333335,
"min": 0.21301233333333333,
"max": 0.5953184666666668,
"count": 100
},
"Pyramids.Policy.Beta.mean": {
"value": 5.8389340000000017e-05,
"min": 5.8389340000000017e-05,
"max": 0.009939260800000002,
"count": 100
},
"Pyramids.Policy.Beta.sum": {
"value": 0.00017516802000000004,
"min": 0.00017516802000000004,
"max": 0.029532314820000002,
"count": 100
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.10602808743715286,
"min": 0.10435324162244797,
"max": 5.4608564376831055,
"count": 100
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.3180842697620392,
"min": 0.20994243025779724,
"max": 10.921712875366211,
"count": 100
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 364.8,
"min": 340.8255813953488,
"max": 999.0,
"count": 100
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 27360.0,
"min": 15984.0,
"max": 33116.0,
"count": 100
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.5551786492268245,
"min": -1.0000000521540642,
"max": 1.626434069465507,
"count": 100
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 116.63839869201183,
"min": -31.99600164592266,
"max": 147.78939798474312,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.5551786492268245,
"min": -1.0000000521540642,
"max": 1.626434069465507,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 116.63839869201183,
"min": -31.99600164592266,
"max": 147.78939798474312,
"count": 100
},
"Pyramids.Policy.RndReward.mean": {
"value": 3.9185459551836055,
"min": 3.687487304524478,
"max": 917.0562484264374,
"count": 100
},
"Pyramids.Policy.RndReward.sum": {
"value": 293.8909466387704,
"min": 292.1203705649823,
"max": 14672.899974822998,
"count": 100
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1756817588",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/home/hungchan/miniconda3/envs/rl_31012/bin/mlagents-learn ./ml-agents/config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training Run 2 --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.8.0+cu128",
"numpy_version": "1.23.5",
"end_time_seconds": "1756821270"
},
"total": 3682.686553206,
"count": 1,
"self": 0.3711183870000241,
"children": {
"run_training.setup": {
"total": 0.015643329999875277,
"count": 1,
"self": 0.015643329999875277
},
"TrainerController.start_learning": {
"total": 3682.299791489,
"count": 1,
"self": 4.471573963244737,
"children": {
"TrainerController._reset_env": {
"total": 1.4596728899996378,
"count": 1,
"self": 1.4596728899996378
},
"TrainerController.advance": {
"total": 3676.2634758217555,
"count": 191819,
"self": 4.349057315184837,
"children": {
"env_step": {
"total": 2625.6239074529876,
"count": 191819,
"self": 2112.8629158190943,
"children": {
"SubprocessEnvManager._take_step": {
"total": 509.92004004172304,
"count": 191819,
"self": 12.48315074475795,
"children": {
"TorchPolicy.evaluate": {
"total": 497.4368892969651,
"count": 187562,
"self": 497.4368892969651
}
}
},
"workers": {
"total": 2.8409515921703132,
"count": 191819,
"self": 0.0,
"children": {
"worker_root": {
"total": 3676.8532693950747,
"count": 191819,
"is_parallel": true,
"self": 1823.0812916923696,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0010134390013263328,
"count": 1,
"is_parallel": true,
"self": 0.00029463299870258197,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0007188060026237508,
"count": 8,
"is_parallel": true,
"self": 0.0007188060026237508
}
}
},
"UnityEnvironment.step": {
"total": 0.022171808001075988,
"count": 1,
"is_parallel": true,
"self": 0.0002565689992479747,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00017987100000027567,
"count": 1,
"is_parallel": true,
"self": 0.00017987100000027567
},
"communicator.exchange": {
"total": 0.02116701700106205,
"count": 1,
"is_parallel": true,
"self": 0.02116701700106205
},
"steps_from_proto": {
"total": 0.0005683510007656878,
"count": 1,
"is_parallel": true,
"self": 0.00014758399993297644,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.00042076700083271135,
"count": 8,
"is_parallel": true,
"self": 0.00042076700083271135
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1853.771977702705,
"count": 191818,
"is_parallel": true,
"self": 49.113975984328135,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 36.795314528650124,
"count": 191818,
"is_parallel": true,
"self": 36.795314528650124
},
"communicator.exchange": {
"total": 1633.0751300659904,
"count": 191818,
"is_parallel": true,
"self": 1633.0751300659904
},
"steps_from_proto": {
"total": 134.7875571237364,
"count": 191818,
"is_parallel": true,
"self": 30.88800354094201,
"children": {
"_process_rank_one_or_two_observation": {
"total": 103.8995535827944,
"count": 1534544,
"is_parallel": true,
"self": 103.8995535827944
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1046.290511053583,
"count": 191819,
"self": 8.3830967354952,
"children": {
"process_trajectory": {
"total": 330.46261656308525,
"count": 191819,
"self": 329.9796428390855,
"children": {
"RLTrainer._checkpoint": {
"total": 0.48297372399974847,
"count": 6,
"self": 0.48297372399974847
}
}
},
"_update_policy": {
"total": 707.4447977550026,
"count": 289,
"self": 501.9307063850556,
"children": {
"TorchPPOOptimizer.update": {
"total": 205.51409136994698,
"count": 17421,
"self": 205.51409136994698
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.53000528877601e-07,
"count": 1,
"self": 8.53000528877601e-07
},
"TrainerController._save_models": {
"total": 0.1050679609998042,
"count": 1,
"self": 0.001174913000795641,
"children": {
"RLTrainer._checkpoint": {
"total": 0.10389304799900856,
"count": 1,
"self": 0.10389304799900856
}
}
}
}
}
}
}