Trained the first Huggy agent based on PPO.

3e3e5e7 verified about 2 years ago

17.4 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.4027626514434814,
	"min": 1.4027626514434814,
	"max": 1.4292452335357666,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 69474.625,
	"min": 68290.25,
	"max": 76629.34375,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 97.18886679920477,
	"min": 82.14285714285714,
	"max": 400.064,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 48886.0,
	"min": 48867.0,
	"max": 50139.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999989.0,
	"min": 49534.0,
	"max": 1999989.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999989.0,
	"min": 49534.0,
	"max": 1999989.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.463482618331909,
	"min": 0.10775356739759445,
	"max": 2.472022294998169,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1239.1317138671875,
	"min": 13.361442565917969,
	"max": 1453.549072265625,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 3.7962356208569963,
	"min": 1.8236981704831123,
	"max": 3.913876270357504,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 1909.506517291069,
	"min": 226.13857313990593,
	"max": 2259.570232331753,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 3.7962356208569963,
	"min": 1.8236981704831123,
	"max": 3.913876270357504,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 1909.506517291069,
	"min": 226.13857313990593,
	"max": 2259.570232331753,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.019307712080383982,
	"min": 0.013247698470756101,
	"max": 0.02031691347947344,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.057923136241151946,
	"min": 0.026495396941512202,
	"max": 0.057923136241151946,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.05089032749334971,
	"min": 0.021669185192634662,
	"max": 0.06362011147042115,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.15267098248004912,
	"min": 0.043338370385269324,
	"max": 0.1800985146313906,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 3.3785488738499998e-06,
	"min": 3.3785488738499998e-06,
	"max": 0.0002953413015529,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 1.013564662155e-05,
	"min": 1.013564662155e-05,
	"max": 0.0008440375686541499,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10112614999999998,
	"min": 0.10112614999999998,
	"max": 0.19844709999999993,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.30337844999999997,
	"min": 0.20742484999999994,
	"max": 0.5813458499999999,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 6.619488500000001e-05,
	"min": 6.619488500000001e-05,
	"max": 0.00492251029,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00019858465500000003,
	"min": 0.00019858465500000003,
	"max": 0.014069157914999998,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1706752519",
	"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1706755022"
	},
	"total": 2503.6983222589997,
	"count": 1,
	"self": 0.43430491700019047,
	"children": {
	"run_training.setup": {
	"total": 0.0757888709999861,
	"count": 1,
	"self": 0.0757888709999861
	},
	"TrainerController.start_learning": {
	"total": 2503.1882284709995,
	"count": 1,
	"self": 4.855238051993638,
	"children": {
	"TrainerController._reset_env": {
	"total": 4.193587248000085,
	"count": 1,
	"self": 4.193587248000085
	},
	"TrainerController.advance": {
	"total": 2494.027991920005,
	"count": 232095,
	"self": 4.9839214308181,
	"children": {
	"env_step": {
	"total": 1996.6991066640403,
	"count": 232095,
	"self": 1652.7253726701188,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 340.753420066859,
	"count": 232095,
	"self": 17.425684225772557,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 323.32773584108645,
	"count": 222978,
	"self": 323.32773584108645
	}
	}
	},
	"workers": {
	"total": 3.2203139270625343,
	"count": 232095,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2495.6089489699184,
	"count": 232095,
	"is_parallel": true,
	"self": 1148.5556726648247,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.001050991000056456,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003031190001365758,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007478719999198802,
	"count": 2,
	"is_parallel": true,
	"self": 0.0007478719999198802
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.03210064399991097,
	"count": 1,
	"is_parallel": true,
	"self": 0.000319063999768332,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00022948300011194078,
	"count": 1,
	"is_parallel": true,
	"self": 0.00022948300011194078
	},
	"communicator.exchange": {
	"total": 0.030804017000036765,
	"count": 1,
	"is_parallel": true,
	"self": 0.030804017000036765
	},
	"steps_from_proto": {
	"total": 0.0007480799999939336,
	"count": 1,
	"is_parallel": true,
	"self": 0.00018958399982693663,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.000558496000166997,
	"count": 2,
	"is_parallel": true,
	"self": 0.000558496000166997
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1347.0532763050937,
	"count": 232094,
	"is_parallel": true,
	"self": 42.79424322732552,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 85.4027541489802,
	"count": 232094,
	"is_parallel": true,
	"self": 85.4027541489802
	},
	"communicator.exchange": {
	"total": 1122.3223586119213,
	"count": 232094,
	"is_parallel": true,
	"self": 1122.3223586119213
	},
	"steps_from_proto": {
	"total": 96.53392031686667,
	"count": 232094,
	"is_parallel": true,
	"self": 33.92946340492699,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 62.60445691193968,
	"count": 464188,
	"is_parallel": true,
	"self": 62.60445691193968
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 492.3449638251468,
	"count": 232095,
	"self": 7.657434308235452,
	"children": {
	"process_trajectory": {
	"total": 154.42602840791392,
	"count": 232095,
	"self": 153.21933608491327,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.2066923230006523,
	"count": 10,
	"self": 1.2066923230006523
	}
	}
	},
	"_update_policy": {
	"total": 330.26150110899744,
	"count": 97,
	"self": 264.82815544999517,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 65.43334565900227,
	"count": 2910,
	"self": 65.43334565900227
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.924000677419826e-06,
	"count": 1,
	"self": 4.924000677419826e-06
	},
	"TrainerController._save_models": {
	"total": 0.1114063269997132,
	"count": 1,
	"self": 0.0019295629990665475,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.10947676400064665,
	"count": 1,
	"self": 0.10947676400064665
	}
	}
	}
	}
	}
	}
	}