thisiswooyeol's picture
SnowballTarget env. with ppo agent
8447e67 verified
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.7178486585617065,
"min": 0.6976361274719238,
"max": 2.878765821456909,
"count": 200
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 7856.853515625,
"min": 4877.86669921875,
"max": 32109.75390625,
"count": 200
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 200
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 10945.0,
"min": 6567.0,
"max": 13134.0,
"count": 200
},
"SnowballTarget.Step.mean": {
"value": 1999800.0,
"min": 9800.0,
"max": 1999800.0,
"count": 200
},
"SnowballTarget.Step.sum": {
"value": 1999800.0,
"min": 9800.0,
"max": 1999800.0,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.986989974975586,
"min": 0.19897232949733734,
"max": 14.026093482971191,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 699.3494873046875,
"min": 9.74964427947998,
"max": 701.3046875,
"count": 200
},
"SnowballTarget.Policy.CuriosityValueEstimate.mean": {
"value": 3.012399673461914,
"min": 1.0613294839859009,
"max": 5.97833251953125,
"count": 200
},
"SnowballTarget.Policy.CuriosityValueEstimate.sum": {
"value": 150.61997985839844,
"min": 52.00514602661133,
"max": 298.9166259765625,
"count": 200
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 27.6,
"min": 3.0408163265306123,
"max": 27.76,
"count": 200
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1380.0,
"min": 149.0,
"max": 1388.0,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 27.6,
"min": 3.0408163265306123,
"max": 27.76,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1380.0,
"min": 149.0,
"max": 1388.0,
"count": 200
},
"SnowballTarget.Policy.CuriosityReward.mean": {
"value": 6.236970176696778,
"min": 5.940337533950806,
"max": 14.174212512969971,
"count": 200
},
"SnowballTarget.Policy.CuriosityReward.sum": {
"value": 311.84850883483887,
"min": 297.0168766975403,
"max": 708.7106256484985,
"count": 200
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.07039043672990362,
"min": 0.060568576078295855,
"max": 0.07814660576781642,
"count": 200
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.21117131018971086,
"min": 0.1290437142443543,
"max": 0.37781506676544147,
"count": 200
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.09840465098165484,
"min": 0.08509081781481738,
"max": 0.21042664999178812,
"count": 200
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.2952139529449645,
"min": 0.17018163562963476,
"max": 1.0369116450641669,
"count": 200
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 9.500996833333289e-07,
"min": 9.500996833333289e-07,
"max": 0.000299175000275,
"count": 200
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 2.8502990499999867e-06,
"min": 2.8502990499999867e-06,
"max": 0.0014884500038499998,
"count": 200
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.10031666666666668,
"min": 0.10031666666666668,
"max": 0.199725,
"count": 200
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.30095000000000005,
"min": 0.23251,
"max": 0.9961500000000001,
"count": 200
},
"SnowballTarget.Policy.Beta.mean": {
"value": 2.580166666666659e-05,
"min": 2.580166666666659e-05,
"max": 0.0049862775,
"count": 200
},
"SnowballTarget.Policy.Beta.sum": {
"value": 7.740499999999976e-05,
"min": 7.740499999999976e-05,
"max": 0.024807885,
"count": 200
},
"SnowballTarget.Losses.CuriosityForwardLoss.mean": {
"value": 0.0300291647356818,
"min": 0.02994706286665271,
"max": 0.11994106799144957,
"count": 200
},
"SnowballTarget.Losses.CuriosityForwardLoss.sum": {
"value": 0.0900874942070454,
"min": 0.06337820248205481,
"max": 0.4797642719657983,
"count": 200
},
"SnowballTarget.Losses.CuriosityInverseLoss.mean": {
"value": 0.5733519672973609,
"min": 0.5513428182773341,
"max": 2.7830430374426,
"count": 200
},
"SnowballTarget.Losses.CuriosityInverseLoss.sum": {
"value": 1.7200559018920827,
"min": 1.1697937635240736,
"max": 13.01859488206751,
"count": 200
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1707215110",
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget-ppo --no-graphics --force",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.0+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1707219958"
},
"total": 4847.346488092001,
"count": 1,
"self": 0.4812986440010718,
"children": {
"run_training.setup": {
"total": 0.07000914399941394,
"count": 1,
"self": 0.07000914399941394
},
"TrainerController.start_learning": {
"total": 4846.7951803040005,
"count": 1,
"self": 6.687735729853557,
"children": {
"TrainerController._reset_env": {
"total": 2.601962673000344,
"count": 1,
"self": 2.601962673000344
},
"TrainerController.advance": {
"total": 4837.395461339146,
"count": 182018,
"self": 3.22620029013342,
"children": {
"env_step": {
"total": 4834.169261049013,
"count": 182018,
"self": 2938.48247238571,
"children": {
"SubprocessEnvManager._take_step": {
"total": 1892.3388483431345,
"count": 182018,
"self": 16.908851210350804,
"children": {
"TorchPolicy.evaluate": {
"total": 1875.4299971327837,
"count": 182018,
"self": 1875.4299971327837
}
}
},
"workers": {
"total": 3.3479403201681635,
"count": 182018,
"self": 0.0,
"children": {
"worker_root": {
"total": 4832.826190093931,
"count": 182018,
"is_parallel": true,
"self": 2419.7278922478063,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0029851570006940165,
"count": 1,
"is_parallel": true,
"self": 0.0008539790023860405,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002131177998307976,
"count": 10,
"is_parallel": true,
"self": 0.002131177998307976
}
}
},
"UnityEnvironment.step": {
"total": 0.038710275999619626,
"count": 1,
"is_parallel": true,
"self": 0.0006204909996085917,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004150790000494453,
"count": 1,
"is_parallel": true,
"self": 0.0004150790000494453
},
"communicator.exchange": {
"total": 0.03580314400005591,
"count": 1,
"is_parallel": true,
"self": 0.03580314400005591
},
"steps_from_proto": {
"total": 0.0018715619999056798,
"count": 1,
"is_parallel": true,
"self": 0.0003699810004036408,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001501580999502039,
"count": 10,
"is_parallel": true,
"self": 0.001501580999502039
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2413.0982978461243,
"count": 182017,
"is_parallel": true,
"self": 109.81895432847523,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 54.1800924606805,
"count": 182017,
"is_parallel": true,
"self": 54.1800924606805
},
"communicator.exchange": {
"total": 1900.838314420108,
"count": 182017,
"is_parallel": true,
"self": 1900.838314420108
},
"steps_from_proto": {
"total": 348.2609366368606,
"count": 182017,
"is_parallel": true,
"self": 67.71488828660858,
"children": {
"_process_rank_one_or_two_observation": {
"total": 280.546048350252,
"count": 1820170,
"is_parallel": true,
"self": 280.546048350252
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 0.001056101000358467,
"count": 1,
"self": 0.001056101000358467,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 4843.667605999015,
"count": 44043,
"is_parallel": true,
"self": 3.7794931780053957,
"children": {
"process_trajectory": {
"total": 614.1665439580274,
"count": 44043,
"is_parallel": true,
"self": 606.9923408430286,
"children": {
"RLTrainer._checkpoint": {
"total": 7.174203114998818,
"count": 40,
"is_parallel": true,
"self": 7.174203114998818
}
}
},
"_update_policy": {
"total": 4225.721568862982,
"count": 767,
"is_parallel": true,
"self": 2335.95828481279,
"children": {
"TorchPPOOptimizer.update": {
"total": 1889.763284050192,
"count": 46299,
"is_parallel": true,
"self": 1889.763284050192
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.10896446100014145,
"count": 1,
"self": 0.0013898779998271493,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1075745830003143,
"count": 1,
"self": 0.1075745830003143
}
}
}
}
}
}
}