ppo-Huggy / run_logs /timers.json
devjwsong's picture
Trained the first Huggy agent based on PPO.
3e3e5e7 verified
{
"name": "root",
"gauges": {
"Huggy.Policy.Entropy.mean": {
"value": 1.4027626514434814,
"min": 1.4027626514434814,
"max": 1.4292452335357666,
"count": 40
},
"Huggy.Policy.Entropy.sum": {
"value": 69474.625,
"min": 68290.25,
"max": 76629.34375,
"count": 40
},
"Huggy.Environment.EpisodeLength.mean": {
"value": 97.18886679920477,
"min": 82.14285714285714,
"max": 400.064,
"count": 40
},
"Huggy.Environment.EpisodeLength.sum": {
"value": 48886.0,
"min": 48867.0,
"max": 50139.0,
"count": 40
},
"Huggy.Step.mean": {
"value": 1999989.0,
"min": 49534.0,
"max": 1999989.0,
"count": 40
},
"Huggy.Step.sum": {
"value": 1999989.0,
"min": 49534.0,
"max": 1999989.0,
"count": 40
},
"Huggy.Policy.ExtrinsicValueEstimate.mean": {
"value": 2.463482618331909,
"min": 0.10775356739759445,
"max": 2.472022294998169,
"count": 40
},
"Huggy.Policy.ExtrinsicValueEstimate.sum": {
"value": 1239.1317138671875,
"min": 13.361442565917969,
"max": 1453.549072265625,
"count": 40
},
"Huggy.Environment.CumulativeReward.mean": {
"value": 3.7962356208569963,
"min": 1.8236981704831123,
"max": 3.913876270357504,
"count": 40
},
"Huggy.Environment.CumulativeReward.sum": {
"value": 1909.506517291069,
"min": 226.13857313990593,
"max": 2259.570232331753,
"count": 40
},
"Huggy.Policy.ExtrinsicReward.mean": {
"value": 3.7962356208569963,
"min": 1.8236981704831123,
"max": 3.913876270357504,
"count": 40
},
"Huggy.Policy.ExtrinsicReward.sum": {
"value": 1909.506517291069,
"min": 226.13857313990593,
"max": 2259.570232331753,
"count": 40
},
"Huggy.Losses.PolicyLoss.mean": {
"value": 0.019307712080383982,
"min": 0.013247698470756101,
"max": 0.02031691347947344,
"count": 40
},
"Huggy.Losses.PolicyLoss.sum": {
"value": 0.057923136241151946,
"min": 0.026495396941512202,
"max": 0.057923136241151946,
"count": 40
},
"Huggy.Losses.ValueLoss.mean": {
"value": 0.05089032749334971,
"min": 0.021669185192634662,
"max": 0.06362011147042115,
"count": 40
},
"Huggy.Losses.ValueLoss.sum": {
"value": 0.15267098248004912,
"min": 0.043338370385269324,
"max": 0.1800985146313906,
"count": 40
},
"Huggy.Policy.LearningRate.mean": {
"value": 3.3785488738499998e-06,
"min": 3.3785488738499998e-06,
"max": 0.0002953413015529,
"count": 40
},
"Huggy.Policy.LearningRate.sum": {
"value": 1.013564662155e-05,
"min": 1.013564662155e-05,
"max": 0.0008440375686541499,
"count": 40
},
"Huggy.Policy.Epsilon.mean": {
"value": 0.10112614999999998,
"min": 0.10112614999999998,
"max": 0.19844709999999993,
"count": 40
},
"Huggy.Policy.Epsilon.sum": {
"value": 0.30337844999999997,
"min": 0.20742484999999994,
"max": 0.5813458499999999,
"count": 40
},
"Huggy.Policy.Beta.mean": {
"value": 6.619488500000001e-05,
"min": 6.619488500000001e-05,
"max": 0.00492251029,
"count": 40
},
"Huggy.Policy.Beta.sum": {
"value": 0.00019858465500000003,
"min": 0.00019858465500000003,
"max": 0.014069157914999998,
"count": 40
},
"Huggy.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
},
"Huggy.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 40
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1706752519",
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics --force",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.0+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1706755022"
},
"total": 2503.6983222589997,
"count": 1,
"self": 0.43430491700019047,
"children": {
"run_training.setup": {
"total": 0.0757888709999861,
"count": 1,
"self": 0.0757888709999861
},
"TrainerController.start_learning": {
"total": 2503.1882284709995,
"count": 1,
"self": 4.855238051993638,
"children": {
"TrainerController._reset_env": {
"total": 4.193587248000085,
"count": 1,
"self": 4.193587248000085
},
"TrainerController.advance": {
"total": 2494.027991920005,
"count": 232095,
"self": 4.9839214308181,
"children": {
"env_step": {
"total": 1996.6991066640403,
"count": 232095,
"self": 1652.7253726701188,
"children": {
"SubprocessEnvManager._take_step": {
"total": 340.753420066859,
"count": 232095,
"self": 17.425684225772557,
"children": {
"TorchPolicy.evaluate": {
"total": 323.32773584108645,
"count": 222978,
"self": 323.32773584108645
}
}
},
"workers": {
"total": 3.2203139270625343,
"count": 232095,
"self": 0.0,
"children": {
"worker_root": {
"total": 2495.6089489699184,
"count": 232095,
"is_parallel": true,
"self": 1148.5556726648247,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.001050991000056456,
"count": 1,
"is_parallel": true,
"self": 0.0003031190001365758,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0007478719999198802,
"count": 2,
"is_parallel": true,
"self": 0.0007478719999198802
}
}
},
"UnityEnvironment.step": {
"total": 0.03210064399991097,
"count": 1,
"is_parallel": true,
"self": 0.000319063999768332,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00022948300011194078,
"count": 1,
"is_parallel": true,
"self": 0.00022948300011194078
},
"communicator.exchange": {
"total": 0.030804017000036765,
"count": 1,
"is_parallel": true,
"self": 0.030804017000036765
},
"steps_from_proto": {
"total": 0.0007480799999939336,
"count": 1,
"is_parallel": true,
"self": 0.00018958399982693663,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.000558496000166997,
"count": 2,
"is_parallel": true,
"self": 0.000558496000166997
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1347.0532763050937,
"count": 232094,
"is_parallel": true,
"self": 42.79424322732552,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 85.4027541489802,
"count": 232094,
"is_parallel": true,
"self": 85.4027541489802
},
"communicator.exchange": {
"total": 1122.3223586119213,
"count": 232094,
"is_parallel": true,
"self": 1122.3223586119213
},
"steps_from_proto": {
"total": 96.53392031686667,
"count": 232094,
"is_parallel": true,
"self": 33.92946340492699,
"children": {
"_process_rank_one_or_two_observation": {
"total": 62.60445691193968,
"count": 464188,
"is_parallel": true,
"self": 62.60445691193968
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 492.3449638251468,
"count": 232095,
"self": 7.657434308235452,
"children": {
"process_trajectory": {
"total": 154.42602840791392,
"count": 232095,
"self": 153.21933608491327,
"children": {
"RLTrainer._checkpoint": {
"total": 1.2066923230006523,
"count": 10,
"self": 1.2066923230006523
}
}
},
"_update_policy": {
"total": 330.26150110899744,
"count": 97,
"self": 264.82815544999517,
"children": {
"TorchPPOOptimizer.update": {
"total": 65.43334565900227,
"count": 2910,
"self": 65.43334565900227
}
}
}
}
}
}
},
"trainer_threads": {
"total": 4.924000677419826e-06,
"count": 1,
"self": 4.924000677419826e-06
},
"TrainerController._save_models": {
"total": 0.1114063269997132,
"count": 1,
"self": 0.0019295629990665475,
"children": {
"RLTrainer._checkpoint": {
"total": 0.10947676400064665,
"count": 1,
"self": 0.10947676400064665
}
}
}
}
}
}
}