MartinVanBuren's picture
Initial training (default config)
8261f5b verified
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.8860200047492981,
"min": 0.8582239747047424,
"max": 2.890317678451538,
"count": 200
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 662.7429809570312,
"min": 575.8682861328125,
"max": 4196.7412109375,
"count": 200
},
"SnowballTarget.Step.mean": {
"value": 199984.0,
"min": 960.0,
"max": 199984.0,
"count": 200
},
"SnowballTarget.Step.sum": {
"value": 199984.0,
"min": 960.0,
"max": 199984.0,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 12.747982025146484,
"min": -0.04493396729230881,
"max": 12.930602073669434,
"count": 200
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 203.96771240234375,
"min": -0.7189434766769409,
"max": 335.475341796875,
"count": 200
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 200
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 90
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 2189.0,
"min": 2189.0,
"max": 2189.0,
"count": 90
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.08328796597807549,
"min": 0.05503600291958006,
"max": 0.08939312359457378,
"count": 90
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.08328796597807549,
"min": 0.05503600291958006,
"max": 0.08939312359457378,
"count": 90
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.2155710961304459,
"min": 0.099495404244711,
"max": 0.33145199044078005,
"count": 90
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.2155710961304459,
"min": 0.099495404244711,
"max": 0.33145199044078005,
"count": 90
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 3.1320989560000013e-06,
"min": 3.1320989560000013e-06,
"max": 0.000296832001056,
"count": 90
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 3.1320989560000013e-06,
"min": 3.1320989560000013e-06,
"max": 0.000296832001056,
"count": 90
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.101044,
"min": 0.101044,
"max": 0.198944,
"count": 90
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.101044,
"min": 0.101044,
"max": 0.198944,
"count": 90
},
"SnowballTarget.Policy.Beta.mean": {
"value": 6.209560000000005e-05,
"min": 6.209560000000005e-05,
"max": 0.0049473056,
"count": 90
},
"SnowballTarget.Policy.Beta.sum": {
"value": 6.209560000000005e-05,
"min": 6.209560000000005e-05,
"max": 0.0049473056,
"count": 90
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 27.0,
"min": 0.0,
"max": 28.0,
"count": 108
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 27.0,
"min": 0.0,
"max": 288.0,
"count": 108
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 27.0,
"min": 0.0,
"max": 28.0,
"count": 108
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 27.0,
"min": 0.0,
"max": 288.0,
"count": 108
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1727569075",
"python_version": "3.10.12 (main, Sep 11 2024, 15:47:36) [GCC 11.4.0]",
"command_line_arguments": "venv/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env ./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.4.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1727569531"
},
"total": 455.9225540000001,
"count": 1,
"self": 0.3770157000000154,
"children": {
"run_training.setup": {
"total": 0.018790500000022803,
"count": 1,
"self": 0.018790500000022803
},
"TrainerController.start_learning": {
"total": 455.52674780000007,
"count": 1,
"self": 0.5759321000072077,
"children": {
"TrainerController._reset_env": {
"total": 2.133236899999929,
"count": 1,
"self": 2.133236899999929
},
"TrainerController.advance": {
"total": 452.76114509999286,
"count": 18205,
"self": 0.25377149999815174,
"children": {
"env_step": {
"total": 452.5073735999947,
"count": 18205,
"self": 351.7054172999932,
"children": {
"SubprocessEnvManager._take_step": {
"total": 100.53704310000921,
"count": 18205,
"self": 1.421968600000696,
"children": {
"TorchPolicy.evaluate": {
"total": 99.11507450000852,
"count": 18205,
"self": 99.11507450000852
}
}
},
"workers": {
"total": 0.2649131999922929,
"count": 18205,
"self": 0.0,
"children": {
"worker_root": {
"total": 454.5515174000094,
"count": 18205,
"is_parallel": true,
"self": 199.80757640002457,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0028673000000480897,
"count": 1,
"is_parallel": true,
"self": 0.0013991000000714848,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014681999999766049,
"count": 10,
"is_parallel": true,
"self": 0.0014681999999766049
}
}
},
"UnityEnvironment.step": {
"total": 0.02484279999998762,
"count": 1,
"is_parallel": true,
"self": 0.0003149999998868225,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00021660000004430913,
"count": 1,
"is_parallel": true,
"self": 0.00021660000004430913
},
"communicator.exchange": {
"total": 0.02342250000003787,
"count": 1,
"is_parallel": true,
"self": 0.02342250000003787
},
"steps_from_proto": {
"total": 0.0008887000000186163,
"count": 1,
"is_parallel": true,
"self": 0.00022590000003219757,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0006627999999864187,
"count": 10,
"is_parallel": true,
"self": 0.0006627999999864187
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 254.74394099998483,
"count": 18204,
"is_parallel": true,
"self": 5.481784899984291,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 4.223203399997715,
"count": 18204,
"is_parallel": true,
"self": 4.223203399997715
},
"communicator.exchange": {
"total": 226.3048554999948,
"count": 18204,
"is_parallel": true,
"self": 226.3048554999948
},
"steps_from_proto": {
"total": 18.734097200008023,
"count": 18204,
"is_parallel": true,
"self": 4.4397260000184815,
"children": {
"_process_rank_one_or_two_observation": {
"total": 14.294371199989541,
"count": 182040,
"is_parallel": true,
"self": 14.294371199989541
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 0.00014460000011240481,
"count": 1,
"self": 0.00014460000011240481,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 450.99759899999015,
"count": 223601,
"is_parallel": true,
"self": 5.733545100014567,
"children": {
"process_trajectory": {
"total": 262.1901510999753,
"count": 223601,
"is_parallel": true,
"self": 261.58092629997543,
"children": {
"RLTrainer._checkpoint": {
"total": 0.609224799999879,
"count": 4,
"is_parallel": true,
"self": 0.609224799999879
}
}
},
"_update_policy": {
"total": 183.07390280000027,
"count": 90,
"is_parallel": true,
"self": 29.04889129999492,
"children": {
"TorchPPOOptimizer.update": {
"total": 154.02501150000535,
"count": 4584,
"is_parallel": true,
"self": 154.02501150000535
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.056289099999958125,
"count": 1,
"self": 0.0007940999998936604,
"children": {
"RLTrainer._checkpoint": {
"total": 0.055495000000064465,
"count": 1,
"self": 0.055495000000064465
}
}
}
}
}
}
}