jonathansculley's picture
Initial training run
7e916a6 verified
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.9595369696617126,
"min": 0.9365046620368958,
"max": 2.849811315536499,
"count": 20
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 9119.439453125,
"min": 9119.439453125,
"max": 29090.875,
"count": 20
},
"SnowballTarget.Step.mean": {
"value": 199984.0,
"min": 9952.0,
"max": 199984.0,
"count": 20
},
"SnowballTarget.Step.sum": {
"value": 199984.0,
"min": 9952.0,
"max": 199984.0,
"count": 20
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.091958045959473,
"min": 0.5954446792602539,
"max": 13.091958045959473,
"count": 20
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 2552.931884765625,
"min": 115.51626586914062,
"max": 2651.05078125,
"count": 20
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.06994937533425971,
"min": 0.06454695140393382,
"max": 0.07801521769298395,
"count": 20
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.27979750133703885,
"min": 0.2581878056157353,
"max": 0.3806577473882605,
"count": 20
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.2034607178294191,
"min": 0.14633530290687785,
"max": 0.30647770762443544,
"count": 20
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.8138428713176764,
"min": 0.5853412116275114,
"max": 1.5323885381221771,
"count": 20
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 8.082097306000005e-06,
"min": 8.082097306000005e-06,
"max": 0.000291882002706,
"count": 20
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 3.232838922400002e-05,
"min": 3.232838922400002e-05,
"max": 0.00138516003828,
"count": 20
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.10269400000000001,
"min": 0.10269400000000001,
"max": 0.19729400000000002,
"count": 20
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.41077600000000003,
"min": 0.41077600000000003,
"max": 0.96172,
"count": 20
},
"SnowballTarget.Policy.Beta.mean": {
"value": 0.0001444306000000001,
"min": 0.0001444306000000001,
"max": 0.0048649706,
"count": 20
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.0005777224000000004,
"min": 0.0005777224000000004,
"max": 0.023089828,
"count": 20
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 20
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 8756.0,
"min": 8756.0,
"max": 10945.0,
"count": 20
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 25.704545454545453,
"min": 3.977272727272727,
"max": 25.704545454545453,
"count": 20
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1131.0,
"min": 175.0,
"max": 1413.0,
"count": 20
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 25.704545454545453,
"min": 3.977272727272727,
"max": 25.704545454545453,
"count": 20
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1131.0,
"min": 175.0,
"max": 1413.0,
"count": 20
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 20
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 20
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1741520369",
"python_version": "3.10.12 (main, Feb 4 2025, 14:57:36) [GCC 11.4.0]",
"command_line_arguments": "/home/jonathan/projects/hf-deep-rl/.venv/bin/mlagents-learn ./SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "1.1.0",
"mlagents_envs_version": "1.1.0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.6.0+cu124",
"numpy_version": "1.23.5",
"end_time_seconds": "1741520690"
},
"total": 321.279661551,
"count": 1,
"self": 0.21770687199989425,
"children": {
"run_training.setup": {
"total": 0.013230865000082304,
"count": 1,
"self": 0.013230865000082304
},
"TrainerController.start_learning": {
"total": 321.048723814,
"count": 1,
"self": 0.2430655300044009,
"children": {
"TrainerController._reset_env": {
"total": 1.746840921999933,
"count": 1,
"self": 1.746840921999933
},
"TrainerController.advance": {
"total": 319.00256912299574,
"count": 18192,
"self": 0.21756725699822255,
"children": {
"env_step": {
"total": 228.88326697300442,
"count": 18192,
"self": 156.16429960900803,
"children": {
"SubprocessEnvManager._take_step": {
"total": 72.56693605799683,
"count": 18192,
"self": 0.7962439469933997,
"children": {
"TorchPolicy.evaluate": {
"total": 71.77069211100343,
"count": 18192,
"self": 71.77069211100343
}
}
},
"workers": {
"total": 0.1520313059995715,
"count": 18192,
"self": 0.0,
"children": {
"worker_root": {
"total": 320.3358378189986,
"count": 18192,
"is_parallel": true,
"self": 178.6593286350069,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0016249419999212478,
"count": 1,
"is_parallel": true,
"self": 0.0007073359998912565,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0009176060000299913,
"count": 10,
"is_parallel": true,
"self": 0.0009176060000299913
}
}
},
"UnityEnvironment.step": {
"total": 0.01570811799990679,
"count": 1,
"is_parallel": true,
"self": 0.00015677299984417914,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00012581700002556317,
"count": 1,
"is_parallel": true,
"self": 0.00012581700002556317
},
"communicator.exchange": {
"total": 0.01490038999997978,
"count": 1,
"is_parallel": true,
"self": 0.01490038999997978
},
"steps_from_proto": {
"total": 0.0005251380000572681,
"count": 1,
"is_parallel": true,
"self": 0.00011398699996334472,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.00041115100009392336,
"count": 10,
"is_parallel": true,
"self": 0.00041115100009392336
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 141.6765091839917,
"count": 18191,
"is_parallel": true,
"self": 2.9389007209972533,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 1.688320935003162,
"count": 18191,
"is_parallel": true,
"self": 1.688320935003162
},
"communicator.exchange": {
"total": 128.75629966399913,
"count": 18191,
"is_parallel": true,
"self": 128.75629966399913
},
"steps_from_proto": {
"total": 8.292987863992153,
"count": 18191,
"is_parallel": true,
"self": 1.7572974540063342,
"children": {
"_process_rank_one_or_two_observation": {
"total": 6.535690409985818,
"count": 181910,
"is_parallel": true,
"self": 6.535690409985818
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 89.90173489299309,
"count": 18192,
"self": 0.2946262619859681,
"children": {
"process_trajectory": {
"total": 19.48712901000738,
"count": 18192,
"self": 19.206301670007292,
"children": {
"RLTrainer._checkpoint": {
"total": 0.28082734000008713,
"count": 4,
"self": 0.28082734000008713
}
}
},
"_update_policy": {
"total": 70.11997962099974,
"count": 90,
"self": 15.347345347997589,
"children": {
"TorchPPOOptimizer.update": {
"total": 54.772634273002154,
"count": 4587,
"self": 54.772634273002154
}
}
}
}
}
}
},
"trainer_threads": {
"total": 6.650000159424962e-07,
"count": 1,
"self": 6.650000159424962e-07
},
"TrainerController._save_models": {
"total": 0.05624757399993996,
"count": 1,
"self": 0.0004357860000254732,
"children": {
"RLTrainer._checkpoint": {
"total": 0.055811787999914486,
"count": 1,
"self": 0.055811787999914486
}
}
}
}
}
}
}