chavicoski's picture
SnowballTarget agent trained for 1000000 steps
9bcd346
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.6769737601280212,
"min": 0.6477652788162231,
"max": 2.8848772048950195,
"count": 100
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 6523.3193359375,
"min": 6220.490234375,
"max": 29575.76171875,
"count": 100
},
"SnowballTarget.Step.mean": {
"value": 999952.0,
"min": 9952.0,
"max": 999952.0,
"count": 100
},
"SnowballTarget.Step.sum": {
"value": 999952.0,
"min": 9952.0,
"max": 999952.0,
"count": 100
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.476271629333496,
"min": 0.2929825186729431,
"max": 13.846470832824707,
"count": 100
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 2627.873046875,
"min": 56.83860778808594,
"max": 2817.32958984375,
"count": 100
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 100
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 8756.0,
"min": 8756.0,
"max": 10945.0,
"count": 100
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.027597169479122385,
"min": 0.02255445428535268,
"max": 0.040866594271695554,
"count": 100
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.11038867791648954,
"min": 0.09557162324199453,
"max": 0.20007359457667914,
"count": 100
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.16476727463304996,
"min": 0.11405681275452176,
"max": 0.36527897914250695,
"count": 100
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.6590690985321999,
"min": 0.45622725101808703,
"max": 1.7647033855319023,
"count": 100
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 1.3764995412000023e-06,
"min": 1.3764995412000023e-06,
"max": 0.00029837640054119997,
"count": 100
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 5.505998164800009e-06,
"min": 5.505998164800009e-06,
"max": 0.001477032007656,
"count": 100
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.10045880000000001,
"min": 0.10045880000000001,
"max": 0.19945880000000002,
"count": 100
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.40183520000000006,
"min": 0.40183520000000006,
"max": 0.992344,
"count": 100
},
"SnowballTarget.Policy.Beta.mean": {
"value": 3.289412000000005e-05,
"min": 3.289412000000005e-05,
"max": 0.00497299412,
"count": 100
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.0001315764800000002,
"min": 0.0001315764800000002,
"max": 0.0246179656,
"count": 100
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 26.355555555555554,
"min": 2.9318181818181817,
"max": 27.431818181818183,
"count": 100
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1186.0,
"min": 129.0,
"max": 1483.0,
"count": 100
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 26.355555555555554,
"min": 2.9318181818181817,
"max": 27.431818181818183,
"count": 100
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1186.0,
"min": 129.0,
"max": 1483.0,
"count": 100
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1673690942",
"python_version": "3.8.8 (default, Feb 24 2021, 21:46:12) \n[GCC 7.3.0]",
"command_line_arguments": "/opt/conda/bin/mlagents-learn ./src/SnowballTarget_params.yaml --env=ml-agents/training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=2023-01-14_10-09-01 --results-dir=./src/runs/train --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1",
"numpy_version": "1.19.2",
"end_time_seconds": "1673695344"
},
"total": 4401.617215140001,
"count": 1,
"self": 0.2190979890001472,
"children": {
"run_training.setup": {
"total": 0.030691262999880564,
"count": 1,
"self": 0.030691262999880564
},
"TrainerController.start_learning": {
"total": 4401.367425888,
"count": 1,
"self": 1.5544644770998275,
"children": {
"TrainerController._reset_env": {
"total": 2.4205356739998933,
"count": 1,
"self": 2.4205356739998933
},
"TrainerController.advance": {
"total": 4397.3119387529005,
"count": 90942,
"self": 0.7482325350829342,
"children": {
"env_step": {
"total": 4396.5637062178175,
"count": 90942,
"self": 3912.197442464144,
"children": {
"SubprocessEnvManager._take_step": {
"total": 483.5600099767471,
"count": 90942,
"self": 3.5419182595856,
"children": {
"TorchPolicy.evaluate": {
"total": 480.0180917171615,
"count": 90942,
"self": 107.47260620901943,
"children": {
"TorchPolicy.sample_actions": {
"total": 372.54548550814206,
"count": 90942,
"self": 372.54548550814206
}
}
}
}
},
"workers": {
"total": 0.8062537769264964,
"count": 90942,
"self": 0.0,
"children": {
"worker_root": {
"total": 4399.180905620952,
"count": 90942,
"is_parallel": true,
"self": 573.7414039370292,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.002669847000106529,
"count": 1,
"is_parallel": true,
"self": 0.0008883280002009997,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0017815189999055292,
"count": 10,
"is_parallel": true,
"self": 0.0017815189999055292
}
}
},
"UnityEnvironment.step": {
"total": 0.0741495180000129,
"count": 1,
"is_parallel": true,
"self": 0.00012181300053271116,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0006089759999667876,
"count": 1,
"is_parallel": true,
"self": 0.0006089759999667876
},
"communicator.exchange": {
"total": 0.07145878899973468,
"count": 1,
"is_parallel": true,
"self": 0.07145878899973468
},
"steps_from_proto": {
"total": 0.001959939999778726,
"count": 1,
"is_parallel": true,
"self": 0.00018193099913332844,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0017780090006453975,
"count": 10,
"is_parallel": true,
"self": 0.0017780090006453975
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 3825.439501683923,
"count": 90941,
"is_parallel": true,
"self": 11.365759420959876,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 47.11917420311329,
"count": 90941,
"is_parallel": true,
"self": 47.11917420311329
},
"communicator.exchange": {
"total": 3562.0366735060857,
"count": 90941,
"is_parallel": true,
"self": 3562.0366735060857
},
"steps_from_proto": {
"total": 204.9178945537642,
"count": 90941,
"is_parallel": true,
"self": 17.167834095604576,
"children": {
"_process_rank_one_or_two_observation": {
"total": 187.75006045815962,
"count": 909410,
"is_parallel": true,
"self": 187.75006045815962
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.299100045405794e-05,
"count": 1,
"self": 8.299100045405794e-05,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 4393.914356482512,
"count": 838210,
"is_parallel": true,
"self": 11.423304304487829,
"children": {
"process_trajectory": {
"total": 3139.1710635020227,
"count": 838210,
"is_parallel": true,
"self": 3134.9514867410253,
"children": {
"RLTrainer._checkpoint": {
"total": 4.21957676099737,
"count": 10,
"is_parallel": true,
"self": 4.21957676099737
}
}
},
"_update_policy": {
"total": 1243.3199886760012,
"count": 454,
"is_parallel": true,
"self": 208.7387946839999,
"children": {
"TorchPPOOptimizer.update": {
"total": 1034.5811939920013,
"count": 5448,
"is_parallel": true,
"self": 1034.5811939920013
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.08040399299898127,
"count": 1,
"self": 0.0004114239982300205,
"children": {
"RLTrainer._checkpoint": {
"total": 0.07999256900075125,
"count": 1,
"self": 0.07999256900075125
}
}
}
}
}
}
}