Yelin Z
rl course default, 500000 steps
f6d793f
{
"name": "root",
"gauges": {
"SnowballTarget.Policy.Entropy.mean": {
"value": 0.6958096623420715,
"min": 0.675806999206543,
"max": 2.846538782119751,
"count": 50
},
"SnowballTarget.Policy.Entropy.sum": {
"value": 6720.1298828125,
"min": 6460.47998046875,
"max": 29401.8984375,
"count": 50
},
"SnowballTarget.Step.mean": {
"value": 499976.0,
"min": 9952.0,
"max": 499976.0,
"count": 50
},
"SnowballTarget.Step.sum": {
"value": 499976.0,
"min": 9952.0,
"max": 499976.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
"value": 13.63944149017334,
"min": 0.5056425333023071,
"max": 13.892260551452637,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
"value": 2796.08544921875,
"min": 98.09465026855469,
"max": 2847.913330078125,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.mean": {
"value": 199.0,
"min": 199.0,
"max": 199.0,
"count": 50
},
"SnowballTarget.Environment.EpisodeLength.sum": {
"value": 10945.0,
"min": 8756.0,
"max": 10945.0,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.mean": {
"value": 0.06949662416206112,
"min": 0.0614933082976552,
"max": 0.07544124020319477,
"count": 50
},
"SnowballTarget.Losses.PolicyLoss.sum": {
"value": 0.3474831208103056,
"min": 0.2459732331906208,
"max": 0.37102275795711886,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.mean": {
"value": 0.1811628484294987,
"min": 0.15294911551937534,
"max": 0.29641295256567934,
"count": 50
},
"SnowballTarget.Losses.ValueLoss.sum": {
"value": 0.9058142421474935,
"min": 0.6117964620775014,
"max": 1.4820647628283967,
"count": 50
},
"SnowballTarget.Policy.LearningRate.mean": {
"value": 3.0605789798399975e-06,
"min": 3.0605789798399975e-06,
"max": 0.00029675280108239997,
"count": 50
},
"SnowballTarget.Policy.LearningRate.sum": {
"value": 1.5302894899199988e-05,
"min": 1.5302894899199988e-05,
"max": 0.001454064015312,
"count": 50
},
"SnowballTarget.Policy.Epsilon.mean": {
"value": 0.10102016,
"min": 0.10102016,
"max": 0.19891759999999997,
"count": 50
},
"SnowballTarget.Policy.Epsilon.sum": {
"value": 0.5051008,
"min": 0.41199040000000003,
"max": 0.984688,
"count": 50
},
"SnowballTarget.Policy.Beta.mean": {
"value": 6.0905983999999945e-05,
"min": 6.0905983999999945e-05,
"max": 0.00494598824,
"count": 50
},
"SnowballTarget.Policy.Beta.sum": {
"value": 0.00030452991999999974,
"min": 0.00030452991999999974,
"max": 0.0242359312,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.mean": {
"value": 27.145454545454545,
"min": 3.9545454545454546,
"max": 27.618181818181817,
"count": 50
},
"SnowballTarget.Environment.CumulativeReward.sum": {
"value": 1493.0,
"min": 174.0,
"max": 1519.0,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.mean": {
"value": 27.145454545454545,
"min": 3.9545454545454546,
"max": 27.618181818181817,
"count": 50
},
"SnowballTarget.Policy.ExtrinsicReward.sum": {
"value": 1493.0,
"min": 174.0,
"max": 1519.0,
"count": 50
},
"SnowballTarget.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
},
"SnowballTarget.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 50
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1676980781",
"python_version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.21.6",
"end_time_seconds": "1676981955"
},
"total": 1173.4596981720001,
"count": 1,
"self": 0.4444905519999338,
"children": {
"run_training.setup": {
"total": 0.11556341599998632,
"count": 1,
"self": 0.11556341599998632
},
"TrainerController.start_learning": {
"total": 1172.8996442040002,
"count": 1,
"self": 1.416543868996314,
"children": {
"TrainerController._reset_env": {
"total": 10.074521226999991,
"count": 1,
"self": 10.074521226999991
},
"TrainerController.advance": {
"total": 1161.283889586004,
"count": 45476,
"self": 0.7463563100170632,
"children": {
"env_step": {
"total": 1160.537533275987,
"count": 45476,
"self": 792.903544411996,
"children": {
"SubprocessEnvManager._take_step": {
"total": 366.91682108899033,
"count": 45476,
"self": 3.7002283039869326,
"children": {
"TorchPolicy.evaluate": {
"total": 363.2165927850034,
"count": 45476,
"self": 81.52815215399295,
"children": {
"TorchPolicy.sample_actions": {
"total": 281.68844063101045,
"count": 45476,
"self": 281.68844063101045
}
}
}
}
},
"workers": {
"total": 0.7171677750006893,
"count": 45476,
"self": 0.0,
"children": {
"worker_root": {
"total": 1168.9129080759712,
"count": 45476,
"is_parallel": true,
"self": 557.8387165319648,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.006598668000037833,
"count": 1,
"is_parallel": true,
"self": 0.003960273999950914,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002638394000086919,
"count": 10,
"is_parallel": true,
"self": 0.002638394000086919
}
}
},
"UnityEnvironment.step": {
"total": 0.03888374500002101,
"count": 1,
"is_parallel": true,
"self": 0.0005529240000328173,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00023629800000435353,
"count": 1,
"is_parallel": true,
"self": 0.00023629800000435353
},
"communicator.exchange": {
"total": 0.03619065599997384,
"count": 1,
"is_parallel": true,
"self": 0.03619065599997384
},
"steps_from_proto": {
"total": 0.0019038670000099955,
"count": 1,
"is_parallel": true,
"self": 0.0004401950000101351,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0014636719999998604,
"count": 10,
"is_parallel": true,
"self": 0.0014636719999998604
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 611.0741915440065,
"count": 45475,
"is_parallel": true,
"self": 24.27271308801278,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 13.32726553899846,
"count": 45475,
"is_parallel": true,
"self": 13.32726553899846
},
"communicator.exchange": {
"total": 492.17080077800455,
"count": 45475,
"is_parallel": true,
"self": 492.17080077800455
},
"steps_from_proto": {
"total": 81.30341213899072,
"count": 45475,
"is_parallel": true,
"self": 17.80279230391926,
"children": {
"_process_rank_one_or_two_observation": {
"total": 63.50061983507146,
"count": 454750,
"is_parallel": true,
"self": 63.50061983507146
}
}
}
}
}
}
}
}
}
}
}
}
},
"trainer_threads": {
"total": 0.00010929399991255195,
"count": 1,
"self": 0.00010929399991255195,
"children": {
"thread_root": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"trainer_advance": {
"total": 1152.689807115095,
"count": 1013222,
"is_parallel": true,
"self": 25.581740080166355,
"children": {
"process_trajectory": {
"total": 653.8927298809265,
"count": 1013222,
"is_parallel": true,
"self": 651.3351190219261,
"children": {
"RLTrainer._checkpoint": {
"total": 2.5576108590004196,
"count": 10,
"is_parallel": true,
"self": 2.5576108590004196
}
}
},
"_update_policy": {
"total": 473.2153371540021,
"count": 227,
"is_parallel": true,
"self": 162.72890957700446,
"children": {
"TorchPPOOptimizer.update": {
"total": 310.48642757699764,
"count": 11571,
"is_parallel": true,
"self": 310.48642757699764
}
}
}
}
}
}
}
}
},
"TrainerController._save_models": {
"total": 0.12458022800001345,
"count": 1,
"self": 0.0010044250000191823,
"children": {
"RLTrainer._checkpoint": {
"total": 0.12357580299999427,
"count": 1,
"self": 0.12357580299999427
}
}
}
}
}
}
}