AMZ2004's picture
SnowballTarget trained with optimized hyperparameters
e6d0f5f verified
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 2.5989835262298584,
"min": 2.5989835262298584,
"max": 2.890258550643921,
"count": 10
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 26587.6015625,
"min": 25363.400390625,
"max": 29726.30859375,
"count": 10
},
"SnowballTarget.Step.mean": {
"value": 99960.0,
"min": 9952.0,
"max": 99960.0,
"count": 10
},
"SnowballTarget.Step.sum": {
"value": 99960.0,
"min": 9952.0,
"max": 99960.0,
"count": 10
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 3.12564754486084,
"min": 0.09583835303783417,
"max": 3.12564754486084,
"count": 10
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 637.632080078125,
"min": 18.592639923095703,
"max": 637.632080078125,
"count": 10
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 10
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 8756.0,
"min": 8756.0,
"max": 10945.0,
"count": 10
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 13.090909090909092,
"min": 2.8636363636363638,
"max": 13.090909090909092,
"count": 10
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 720.0,
"min": 126.0,
"max": 720.0,
"count": 10
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 13.090909090909092,
"min": 2.8636363636363638,
"max": 13.090909090909092,
"count": 10
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 720.0,
"min": 126.0,
"max": 720.0,
"count": 10
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 10
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 10
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.050420518025930505,
"min": 0.042750171776595944,
"max": 0.05169212756415315,
"count": 9
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.050420518025930505,
"min": 0.042750171776595944,
"max": 0.05169212756415315,
"count": 9
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.2829171262681484,
"min": 0.08711123023004759,
"max": 0.2829171262681484,
"count": 9
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.2829171262681484,
"min": 0.08711123023004759,
"max": 0.2829171262681484,
"count": 9
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 1.780809406399999e-05,
"min": 1.780809406399999e-05,
"max": 0.0002672640109119999,
"count": 9
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 1.780809406399999e-05,
"min": 1.780809406399999e-05,
"max": 0.0002672640109119999,
"count": 9
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.10593600000000002,
"min": 0.10593600000000002,
"max": 0.18908800000000003,
"count": 9
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.10593600000000002,
"min": 0.10593600000000002,
"max": 0.18908800000000003,
"count": 9
},
"SnowballTarget.Policy.Beta.mean": {
"value": 0.0003062063999999999,
"min": 0.0003062063999999999,
"max": 0.004455491200000001,
"count": 9
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.0003062063999999999,
"min": 0.0003062063999999999,
"max": 0.004455491200000001,
"count": 9
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1754240485",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/miniconda/envs/myenv/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.7.1+cu126",
"numpy_version": "1.23.5",
"end_time_seconds": "1754240843"
},
"total": 358.7673213730001,
"count": 1,
"self": 0.5388858770000979,
"children": {
"run_training.setup": {
"total": 0.023231278000025668,
"count": 1,
"self": 0.023231278000025668
},
"TrainerController.start_learning": {
"total": 358.20520421799995,
"count": 1,
"self": 0.7014976329943465,
"children": {
"TrainerController._reset_env": {
"total": 2.3926439930000925,
"count": 1,
"self": 2.3926439930000925
},
"TrainerController.advance": {
"total": 354.9234442670056,
"count": 9142,
"self": 0.22896362200765452,
"children": {
"env_step": {
"total": 354.69448064499795,
"count": 9142,
"self": 272.7905712980055,
"children": {
"SubprocessEnvManager._take_step": {
"total": 81.69541058700099,
"count": 9142,
"self": 1.3159509630017965,
"children": {
"TorchPolicy.evaluate": {
"total": 80.3794596239992,
"count": 9142,
"self": 80.3794596239992
}
}
},
"workers": {
"total": 0.20849875999147116,
"count": 9142,
"self": 0.0,
"children": {
"worker_root": {
"total": 356.7475550349858,
"count": 9142,
"is_parallel": true,
"self": 198.96295247298917,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.002402198000027056,
"count": 1,
"is_parallel": true,
"self": 0.0008803609999858963,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0015218370000411596,
"count": 10,
"is_parallel": true,
"self": 0.0015218370000411596
}
}
},
"UnityEnvironment.step": {
"total": 0.04974442899992937,
"count": 1,
"is_parallel": true,
"self": 0.0007538439997460955,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0004302010000856171,
"count": 1,
"is_parallel": true,
"self": 0.0004302010000856171
},
"communicator.exchange": {
"total": 0.045982275000028494,
"count": 1,
"is_parallel": true,
"self": 0.045982275000028494
},
"steps_from_proto": {
"total": 0.002578109000069162,
"count": 1,
"is_parallel": true,
"self": 0.0005186529998582046,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0020594560002109574,
"count": 10,
"is_parallel": true,
"self": 0.0020594560002109574
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 157.78460256199662,
"count": 9141,
"is_parallel": true,
"self": 7.004816163011242,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 3.764690716995233,
"count": 9141,
"is_parallel": true,
"self": 3.764690716995233
},
"communicator.exchange": {
"total": 124.96474915599333,
"count": 9141,
"is_parallel": true,
"self": 124.96474915599333
},
"steps_from_proto": {
"total": 22.050346525996815,
"count": 9141,
"is_parallel": true,
"self": 4.628739025985396,
"children": {
"_process_rank_one_or_two_observation": {
"total": 17.42160750001142,
"count": 91410,
"is_parallel": true,
"self": 17.42160750001142
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 0.00033579699993424583,
"count": 1,
"self": 0.00033579699993424583,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 350.60764984000775,
"count": 519341,
"is_parallel": true,
"self": 11.664670684981502,
"children": {
"process_trajectory": {
"total": 227.69188359802638,
"count": 519341,
"is_parallel": true,
"self": 227.0043836920263,
"children": {
"RLTrainer._checkpoint": {
"total": 0.6874999060000846,
"count": 2,
"is_parallel": true,
"self": 0.6874999060000846
}
}
},
"_update_policy": {
"total": 111.25109555699987,
"count": 9,
"is_parallel": true,
"self": 24.265683099992998,
"children": {
"TorchPPOOptimizer.update": {
"total": 86.98541245700687,
"count": 1089,
"is_parallel": true,
"self": 86.98541245700687
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.1872825279999688,
"count": 1,
"self": 0.0038540959999409097,
"children": {
"RLTrainer._checkpoint": {
"total": 0.1834284320000279,
"count": 1,
"self": 0.1834284320000279
}
}
}
}
}
}
}