hudaifah's picture
First Training
0a266f7 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.41223639249801636,
"min": 0.4115261137485504,
"max": 1.5156562328338623,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 12386.87890625,
"min": 12247.017578125,
"max": 45978.94921875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989888.0,
"min": 29952.0,
"max": 989888.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989888.0,
"min": 29952.0,
"max": 989888.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.6429005861282349,
"min": -0.13956299424171448,
"max": 0.6429005861282349,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 183.22666931152344,
"min": -33.07643127441406,
"max": 183.22666931152344,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": -0.010501052252948284,
"min": -0.029065605252981186,
"max": 0.3260534107685089,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": -2.992799997329712,
"min": -7.266401290893555,
"max": 77.274658203125,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06699213291479585,
"min": 0.06584831623366057,
"max": 0.07383840540742745,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9378898608071419,
"min": 0.5025261492353339,
"max": 1.0497575242382784,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013549456424481746,
"min": 6.910190867464345e-05,
"max": 0.014648863216107899,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.18969238994274445,
"min": 0.0009674267214450084,
"max": 0.21184575209917966,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.0054868914878572e-05,
"min": 1.0054868914878572e-05,
"max": 0.0003935341730450286,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.0001407681648083,
"min": 0.0001407681648083,
"max": 0.004810910497272401,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10251369285714287,
"min": 0.10251369285714287,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4351917000000003,
"min": 1.3886848,
"max": 2.5693371999999997,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002611179164285714,
"min": 0.0002611179164285714,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0036556508299999996,
"min": 0.0036556508299999996,
"max": 0.12028248724,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.00562311289831996,
"min": 0.00562311289831996,
"max": 0.22270117700099945,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.07872357964515686,
"min": 0.07872357964515686,
"max": 1.558908224105835,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 338.38947368421054,
"min": 338.38947368421054,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 32147.0,
"min": 15984.0,
"max": 33138.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.6405515597054834,
"min": -1.0000000521540642,
"max": 1.6405515597054834,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 155.8523981720209,
"min": -30.993801593780518,
"max": 155.8523981720209,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.6405515597054834,
"min": -1.0000000521540642,
"max": 1.6405515597054834,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 155.8523981720209,
"min": -30.993801593780518,
"max": 155.8523981720209,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.020046429991711384,
"min": 0.020046429991711384,
"max": 5.851396445883438,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 1.9044108492125815,
"min": 1.8089309328352101,
"max": 93.62234313413501,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1742818128",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.6.0+cu124",
"numpy_version": "1.23.5",
"end_time_seconds": "1742820286"
},
"total": 2157.9988638679997,
"count": 1,
"self": 0.6316298950005148,
"children": {
"run_training.setup": {
"total": 0.03002231899972685,
"count": 1,
"self": 0.03002231899972685
},
"TrainerController.start_learning": {
"total": 2157.3372116539995,
"count": 1,
"self": 1.1891937080290518,
"children": {
"TrainerController._reset_env": {
"total": 2.3822817420004867,
"count": 1,
"self": 2.3822817420004867
},
"TrainerController.advance": {
"total": 2153.6462793249702,
"count": 63907,
"self": 1.2488220429568173,
"children": {
"env_step": {
"total": 1486.761276942084,
"count": 63907,
"self": 1341.4976056459363,
"children": {
"SubprocessEnvManager._take_step": {
"total": 144.56020866412018,
"count": 63907,
"self": 4.495075727220865,
"children": {
"TorchPolicy.evaluate": {
"total": 140.06513293689932,
"count": 62553,
"self": 140.06513293689932
}
}
},
"workers": {
"total": 0.7034626320273674,
"count": 63907,
"self": 0.0,
"children": {
"worker_root": {
"total": 2152.4542569179757,
"count": 63907,
"is_parallel": true,
"self": 916.4597327708498,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0020301170006860048,
"count": 1,
"is_parallel": true,
"self": 0.0006597630017495248,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.00137035399893648,
"count": 8,
"is_parallel": true,
"self": 0.00137035399893648
}
}
},
"UnityEnvironment.step": {
"total": 0.05413581500033615,
"count": 1,
"is_parallel": true,
"self": 0.0005221930005063768,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00043089999962830916,
"count": 1,
"is_parallel": true,
"self": 0.00043089999962830916
},
"communicator.exchange": {
"total": 0.05153920500015374,
"count": 1,
"is_parallel": true,
"self": 0.05153920500015374
},
"steps_from_proto": {
"total": 0.0016435170000477228,
"count": 1,
"is_parallel": true,
"self": 0.00036588099919754313,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012776360008501797,
"count": 8,
"is_parallel": true,
"self": 0.0012776360008501797
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1235.994524147126,
"count": 63906,
"is_parallel": true,
"self": 30.567976831032865,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.864224016029766,
"count": 63906,
"is_parallel": true,
"self": 22.864224016029766
},
"communicator.exchange": {
"total": 1090.8281828610498,
"count": 63906,
"is_parallel": true,
"self": 1090.8281828610498
},
"steps_from_proto": {
"total": 91.73414043901357,
"count": 63906,
"is_parallel": true,
"self": 17.896462029219947,
"children": {
"_process_rank_one_or_two_observation": {
"total": 73.83767840979363,
"count": 511248,
"is_parallel": true,
"self": 73.83767840979363
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 665.6361803399295,
"count": 63907,
"self": 2.37789696388063,
"children": {
"process_trajectory": {
"total": 124.39505907404146,
"count": 63907,
"self": 124.15906461604118,
"children": {
"RLTrainer._checkpoint": {
"total": 0.23599445800027752,
"count": 2,
"self": 0.23599445800027752
}
}
},
"_update_policy": {
"total": 538.8632243020074,
"count": 453,
"self": 295.24675928606393,
"children": {
"TorchPPOOptimizer.update": {
"total": 243.6164650159435,
"count": 22737,
"self": 243.6164650159435
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.249999513674993e-06,
"count": 1,
"self": 1.249999513674993e-06
},
"TrainerController._save_models": {
"total": 0.11945562900018558,
"count": 1,
"self": 0.0020706980003524222,
"children": {
"RLTrainer._checkpoint": {
"total": 0.11738493099983316,
"count": 1,
"self": 0.11738493099983316
}
}
}
}
}
}
}