ppo-Huggy / run_logs /timers.json
yoyoh's picture
PPO Huggy training
76c8945 verified
{
"name": "root",
"gauges": {
"Huggy.Policy.Entropy.mean": {
"value": 1.4034281969070435,
"min": 1.4034281969070435,
"max": 1.4254093170166016,
"count": 40
},
"Huggy.Policy.Entropy.sum": {
"value": 70506.828125,
"min": 67530.0,
"max": 77140.40625,
"count": 40
},
"Huggy.Environment.EpisodeLength.mean": {
"value": 79.13001605136436,
"min": 75.60736196319019,
"max": 396.07936507936506,
"count": 40
},
"Huggy.Environment.EpisodeLength.sum": {
"value": 49298.0,
"min": 48950.0,
"max": 49910.0,
"count": 40
},
"Huggy.Step.mean": {
"value": 1999978.0,
"min": 49297.0,
"max": 1999978.0,
"count": 40
},
"Huggy.Step.sum": {
"value": 1999978.0,
"min": 49297.0,
"max": 1999978.0,
"count": 40
},
"Huggy.Policy.ExtrinsicValueEstimate.mean": {
"value": 2.547334909439087,
"min": 0.03736720234155655,
"max": 2.547334909439087,
"count": 40
},
"Huggy.Policy.ExtrinsicValueEstimate.sum": {
"value": 1586.9896240234375,
"min": 4.670900344848633,
"max": 1615.748779296875,
"count": 40
},
"Huggy.Environment.CumulativeReward.mean": {
"value": 4.024332938569315,
"min": 1.6354788365364075,
"max": 4.024332938569315,
"count": 40
},
"Huggy.Environment.CumulativeReward.sum": {
"value": 2507.1594207286835,
"min": 204.43485456705093,
"max": 2530.5656306147575,
"count": 40
},
"Huggy.Policy.ExtrinsicReward.mean": {
"value": 4.024332938569315,
"min": 1.6354788365364075,
"max": 4.024332938569315,
"count": 40
},
"Huggy.Policy.ExtrinsicReward.sum": {
"value": 2507.1594207286835,
"min": 204.43485456705093,
"max": 2530.5656306147575,
"count": 40
},
"Huggy.Losses.PolicyLoss.mean": {
"value": 0.01572424361736719,
"min": 0.013764370328378087,
"max": 0.02050111532056083,
"count": 40
},
"Huggy.Losses.PolicyLoss.sum": {
"value": 0.04717273085210157,
"min": 0.027528740656756173,
"max": 0.05341944813311178,
"count": 40
},
"Huggy.Losses.ValueLoss.mean": {
"value": 0.06232480626139376,
"min": 0.02198513010516763,
"max": 0.06592570630212624,
"count": 40
},
"Huggy.Losses.ValueLoss.sum": {
"value": 0.18697441878418128,
"min": 0.04397026021033526,
"max": 0.19777711890637872,
"count": 40
},
"Huggy.Policy.LearningRate.mean": {
"value": 3.6188487937499925e-06,
"min": 3.6188487937499925e-06,
"max": 0.0002953074765641749,
"count": 40
},
"Huggy.Policy.LearningRate.sum": {
"value": 1.0856546381249978e-05,
"min": 1.0856546381249978e-05,
"max": 0.0008438973187008998,
"count": 40
},
"Huggy.Policy.Epsilon.mean": {
"value": 0.10120625000000001,
"min": 0.10120625000000001,
"max": 0.19843582500000007,
"count": 40
},
"Huggy.Policy.Epsilon.sum": {
"value": 0.30361875000000005,
"min": 0.20754604999999998,
"max": 0.5812991000000002,
"count": 40
},
"Huggy.Policy.Beta.mean": {
"value": 7.019187499999988e-05,
"min": 7.019187499999988e-05,
"max": 0.004921947667499999,
"count": 40
},
"Huggy.Policy.Beta.sum": {
"value": 0.00021057562499999965,
"min": 0.00021057562499999965,
"max": 0.014066825089999999,
"count": 40
},
"Huggy.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
},
"Huggy.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1742524218",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/home/samrito/anaconda3/envs/drl_hf/bin/mlagents-learn ./ml-agents/config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics --force",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.6.0+cu124",
"numpy_version": "1.23.5",
"end_time_seconds": "1742525039"
},
"total": 820.1580611271784,
"count": 1,
"self": 0.16697772918269038,
"children": {
"run_training.setup": {
"total": 0.008863725000992417,
"count": 1,
"self": 0.008863725000992417
},
"TrainerController.start_learning": {
"total": 819.9822196729947,
"count": 1,
"self": 1.8928601217921823,
"children": {
"TrainerController._reset_env": {
"total": 1.0225368258543313,
"count": 1,
"self": 1.0225368258543313
},
"TrainerController.advance": {
"total": 817.0091294022277,
"count": 233093,
"self": 1.786020978121087,
"children": {
"env_step": {
"total": 633.5002282338683,
"count": 233093,
"self": 488.46714628860354,
"children": {
"SubprocessEnvManager._take_step": {
"total": 143.8156062825583,
"count": 233093,
"self": 6.789004281628877,
"children": {
"TorchPolicy.evaluate": {
"total": 137.02660200092942,
"count": 222956,
"self": 137.02660200092942
}
}
},
"workers": {
"total": 1.2174756627064198,
"count": 233093,
"self": 0.0,
"children": {
"worker_root": {
"total": 817.54456645553,
"count": 233093,
"is_parallel": true,
"self": 436.1020396091044,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0005594110116362572,
"count": 1,
"is_parallel": true,
"self": 0.00010283198207616806,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0004565790295600891,
"count": 2,
"is_parallel": true,
"self": 0.0004565790295600891
}
}
},
"UnityEnvironment.step": {
"total": 0.010016812942922115,
"count": 1,
"is_parallel": true,
"self": 9.863288141787052e-05,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 9.079813025891781e-05,
"count": 1,
"is_parallel": true,
"self": 9.079813025891781e-05
},
"communicator.exchange": {
"total": 0.009559697937220335,
"count": 1,
"is_parallel": true,
"self": 0.009559697937220335
},
"steps_from_proto": {
"total": 0.000267683994024992,
"count": 1,
"is_parallel": true,
"self": 6.234901957213879e-05,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0002053349744528532,
"count": 2,
"is_parallel": true,
"self": 0.0002053349744528532
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 381.4425268464256,
"count": 233092,
"is_parallel": true,
"self": 10.97539081517607,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 19.231896872166544,
"count": 233092,
"is_parallel": true,
"self": 19.231896872166544
},
"communicator.exchange": {
"total": 326.857640276663,
"count": 233092,
"is_parallel": true,
"self": 326.857640276663
},
"steps_from_proto": {
"total": 24.37759888241999,
"count": 233092,
"is_parallel": true,
"self": 8.469585163053125,
"children": {
"_process_rank_one_or_two_observation": {
"total": 15.908013719366863,
"count": 466184,
"is_parallel": true,
"self": 15.908013719366863
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 181.72288019023836,
"count": 233093,
"self": 2.791784961009398,
"children": {
"process_trajectory": {
"total": 65.78023722278886,
"count": 233093,
"self": 65.20290142251179,
"children": {
"RLTrainer._checkpoint": {
"total": 0.5773358002770692,
"count": 10,
"self": 0.5773358002770692
}
}
},
"_update_policy": {
"total": 113.1508580064401,
"count": 97,
"self": 93.06675824220292,
"children": {
"TorchPPOOptimizer.update": {
"total": 20.08409976423718,
"count": 2910,
"self": 20.08409976423718
}
}
}
}
}
}
},
"trainer_threads": {
"total": 5.920883268117905e-07,
"count": 1,
"self": 5.920883268117905e-07
},
"TrainerController._save_models": {
"total": 0.057692731032148004,
"count": 1,
"self": 0.000729620922356844,
"children": {
"RLTrainer._checkpoint": {
"total": 0.05696311010979116,
"count": 1,
"self": 0.05696311010979116
}
}
}
}
}
}
}