drl-robo's picture
First Push
f6ba229 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.7023671269416809,
"min": 0.6934306025505066,
"max": 1.4070243835449219,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 20902.4453125,
"min": 20814.013671875,
"max": 42683.4921875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989964.0,
"min": 29936.0,
"max": 989964.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989964.0,
"min": 29936.0,
"max": 989964.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.06398965418338776,
"min": -0.0958520695567131,
"max": 0.10056355595588684,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 15.677465438842773,
"min": -23.004497528076172,
"max": 23.934125900268555,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.009777371771633625,
"min": 0.008069727569818497,
"max": 0.46995115280151367,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 2.395456075668335,
"min": 1.9851529598236084,
"max": 111.84837341308594,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.0725670515463253,
"min": 0.06354882406638629,
"max": 0.0725670515463253,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.0159387216485543,
"min": 0.5712860204135803,
"max": 1.0371465287079997,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.007225885001567874,
"min": 0.00035187182043963395,
"max": 0.010827925445492444,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.10116239002195024,
"min": 0.004222461845275608,
"max": 0.11196652697635369,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.356954690571428e-06,
"min": 7.356954690571428e-06,
"max": 0.00029484975171675,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010299736566799999,
"min": 0.00010299736566799999,
"max": 0.0033758236747255,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10245228571428573,
"min": 0.10245228571428573,
"max": 0.19828325,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4343320000000002,
"min": 1.4343320000000002,
"max": 2.4252745,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002549833428571428,
"min": 0.0002549833428571428,
"max": 0.009828496675,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0035697667999999997,
"min": 0.0035697667999999997,
"max": 0.11254492255000001,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.00951018650084734,
"min": 0.009361805394291878,
"max": 0.5220038890838623,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.1331426054239273,
"min": 0.1310652792453766,
"max": 4.176031112670898,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 795.918918918919,
"min": 768.375,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29449.0,
"min": 16990.0,
"max": 32334.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 0.20945551163620418,
"min": -0.9999419878567418,
"max": 0.3654409845670064,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 7.540398418903351,
"min": -30.998201623558998,
"max": 14.25219839811325,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 0.20945551163620418,
"min": -0.9999419878567418,
"max": 0.3654409845670064,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 7.540398418903351,
"min": -30.998201623558998,
"max": 14.25219839811325,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.07611777111176504,
"min": 0.07611777111176504,
"max": 10.348312054243353,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.7402397600235417,
"min": 2.7402397600235417,
"max": 186.26961697638035,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1750352984",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.7.1+cu126",
"numpy_version": "1.23.5",
"end_time_seconds": "1750354953"
},
"total": 1969.2311567800002,
"count": 1,
"self": 0.481098374000112,
"children": {
"run_training.setup": {
"total": 0.020751517999997304,
"count": 1,
"self": 0.020751517999997304
},
"TrainerController.start_learning": {
"total": 1968.729306888,
"count": 1,
"self": 1.3111864019647328,
"children": {
"TrainerController._reset_env": {
"total": 2.1359792779999225,
"count": 1,
"self": 2.1359792779999225
},
"TrainerController.advance": {
"total": 1965.2023829790353,
"count": 63198,
"self": 1.3216013430751445,
"children": {
"env_step": {
"total": 1324.4829256769328,
"count": 63198,
"self": 1180.103903649955,
"children": {
"SubprocessEnvManager._take_step": {
"total": 143.61977277995038,
"count": 63198,
"self": 4.491907906928418,
"children": {
"TorchPolicy.evaluate": {
"total": 139.12786487302196,
"count": 62541,
"self": 139.12786487302196
}
}
},
"workers": {
"total": 0.7592492470273555,
"count": 63198,
"self": 0.0,
"children": {
"worker_root": {
"total": 1964.1190016759558,
"count": 63198,
"is_parallel": true,
"self": 891.5834595538963,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0018480270000509336,
"count": 1,
"is_parallel": true,
"self": 0.0006316330002391624,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012163939998117712,
"count": 8,
"is_parallel": true,
"self": 0.0012163939998117712
}
}
},
"UnityEnvironment.step": {
"total": 0.04658258299991758,
"count": 1,
"is_parallel": true,
"self": 0.000518244999966555,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00046939699996073614,
"count": 1,
"is_parallel": true,
"self": 0.00046939699996073614
},
"communicator.exchange": {
"total": 0.04398721400002614,
"count": 1,
"is_parallel": true,
"self": 0.04398721400002614
},
"steps_from_proto": {
"total": 0.001607726999964143,
"count": 1,
"is_parallel": true,
"self": 0.0003257850003137719,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0012819419996503711,
"count": 8,
"is_parallel": true,
"self": 0.0012819419996503711
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1072.5355421220595,
"count": 63197,
"is_parallel": true,
"self": 30.903578792971757,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 22.284041474053538,
"count": 63197,
"is_parallel": true,
"self": 22.284041474053538
},
"communicator.exchange": {
"total": 925.3184415689864,
"count": 63197,
"is_parallel": true,
"self": 925.3184415689864
},
"steps_from_proto": {
"total": 94.02948028604783,
"count": 63197,
"is_parallel": true,
"self": 18.648399236996056,
"children": {
"_process_rank_one_or_two_observation": {
"total": 75.38108104905177,
"count": 505576,
"is_parallel": true,
"self": 75.38108104905177
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 639.3978559590273,
"count": 63198,
"self": 2.3177088510258272,
"children": {
"process_trajectory": {
"total": 121.86782405100416,
"count": 63198,
"self": 121.6555378950045,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2122861559996636,
"count": 2,
"self": 0.2122861559996636
}
}
},
"_update_policy": {
"total": 515.2123230569973,
"count": 445,
"self": 289.1893231809822,
"children": {
"TorchPPOOptimizer.update": {
"total": 226.0229998760151,
"count": 22710,
"self": 226.0229998760151
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.1629999789875e-06,
"count": 1,
"self": 1.1629999789875e-06
},
"TrainerController._save_models": {
"total": 0.07975706600018384,
"count": 1,
"self": 0.001084077000086836,
"children": {
"RLTrainer._checkpoint": {
"total": 0.078672989000097,
"count": 1,
"self": 0.078672989000097
}
}
}
}
}
}
}