SnowballTarget env. with ppo agent

8447e67 verified about 2 years ago

20.2 kB

	{
	"name": "root",
	"gauges": {
	"SnowballTarget.Policy.Entropy.mean": {
	"value": 0.7178486585617065,
	"min": 0.6976361274719238,
	"max": 2.878765821456909,
	"count": 200
	},
	"SnowballTarget.Policy.Entropy.sum": {
	"value": 7856.853515625,
	"min": 4877.86669921875,
	"max": 32109.75390625,
	"count": 200
	},
	"SnowballTarget.Environment.EpisodeLength.mean": {
	"value": 199.0,
	"min": 199.0,
	"max": 199.0,
	"count": 200
	},
	"SnowballTarget.Environment.EpisodeLength.sum": {
	"value": 10945.0,
	"min": 6567.0,
	"max": 13134.0,
	"count": 200
	},
	"SnowballTarget.Step.mean": {
	"value": 1999800.0,
	"min": 9800.0,
	"max": 1999800.0,
	"count": 200
	},
	"SnowballTarget.Step.sum": {
	"value": 1999800.0,
	"min": 9800.0,
	"max": 1999800.0,
	"count": 200
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
	"value": 13.986989974975586,
	"min": 0.19897232949733734,
	"max": 14.026093482971191,
	"count": 200
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
	"value": 699.3494873046875,
	"min": 9.74964427947998,
	"max": 701.3046875,
	"count": 200
	},
	"SnowballTarget.Policy.CuriosityValueEstimate.mean": {
	"value": 3.012399673461914,
	"min": 1.0613294839859009,
	"max": 5.97833251953125,
	"count": 200
	},
	"SnowballTarget.Policy.CuriosityValueEstimate.sum": {
	"value": 150.61997985839844,
	"min": 52.00514602661133,
	"max": 298.9166259765625,
	"count": 200
	},
	"SnowballTarget.Environment.CumulativeReward.mean": {
	"value": 27.6,
	"min": 3.0408163265306123,
	"max": 27.76,
	"count": 200
	},
	"SnowballTarget.Environment.CumulativeReward.sum": {
	"value": 1380.0,
	"min": 149.0,
	"max": 1388.0,
	"count": 200
	},
	"SnowballTarget.Policy.ExtrinsicReward.mean": {
	"value": 27.6,
	"min": 3.0408163265306123,
	"max": 27.76,
	"count": 200
	},
	"SnowballTarget.Policy.ExtrinsicReward.sum": {
	"value": 1380.0,
	"min": 149.0,
	"max": 1388.0,
	"count": 200
	},
	"SnowballTarget.Policy.CuriosityReward.mean": {
	"value": 6.236970176696778,
	"min": 5.940337533950806,
	"max": 14.174212512969971,
	"count": 200
	},
	"SnowballTarget.Policy.CuriosityReward.sum": {
	"value": 311.84850883483887,
	"min": 297.0168766975403,
	"max": 708.7106256484985,
	"count": 200
	},
	"SnowballTarget.Losses.PolicyLoss.mean": {
	"value": 0.07039043672990362,
	"min": 0.060568576078295855,
	"max": 0.07814660576781642,
	"count": 200
	},
	"SnowballTarget.Losses.PolicyLoss.sum": {
	"value": 0.21117131018971086,
	"min": 0.1290437142443543,
	"max": 0.37781506676544147,
	"count": 200
	},
	"SnowballTarget.Losses.ValueLoss.mean": {
	"value": 0.09840465098165484,
	"min": 0.08509081781481738,
	"max": 0.21042664999178812,
	"count": 200
	},
	"SnowballTarget.Losses.ValueLoss.sum": {
	"value": 0.2952139529449645,
	"min": 0.17018163562963476,
	"max": 1.0369116450641669,
	"count": 200
	},
	"SnowballTarget.Policy.LearningRate.mean": {
	"value": 9.500996833333289e-07,
	"min": 9.500996833333289e-07,
	"max": 0.000299175000275,
	"count": 200
	},
	"SnowballTarget.Policy.LearningRate.sum": {
	"value": 2.8502990499999867e-06,
	"min": 2.8502990499999867e-06,
	"max": 0.0014884500038499998,
	"count": 200
	},
	"SnowballTarget.Policy.Epsilon.mean": {
	"value": 0.10031666666666668,
	"min": 0.10031666666666668,
	"max": 0.199725,
	"count": 200
	},
	"SnowballTarget.Policy.Epsilon.sum": {
	"value": 0.30095000000000005,
	"min": 0.23251,
	"max": 0.9961500000000001,
	"count": 200
	},
	"SnowballTarget.Policy.Beta.mean": {
	"value": 2.580166666666659e-05,
	"min": 2.580166666666659e-05,
	"max": 0.0049862775,
	"count": 200
	},
	"SnowballTarget.Policy.Beta.sum": {
	"value": 7.740499999999976e-05,
	"min": 7.740499999999976e-05,
	"max": 0.024807885,
	"count": 200
	},
	"SnowballTarget.Losses.CuriosityForwardLoss.mean": {
	"value": 0.0300291647356818,
	"min": 0.02994706286665271,
	"max": 0.11994106799144957,
	"count": 200
	},
	"SnowballTarget.Losses.CuriosityForwardLoss.sum": {
	"value": 0.0900874942070454,
	"min": 0.06337820248205481,
	"max": 0.4797642719657983,
	"count": 200
	},
	"SnowballTarget.Losses.CuriosityInverseLoss.mean": {
	"value": 0.5733519672973609,
	"min": 0.5513428182773341,
	"max": 2.7830430374426,
	"count": 200
	},
	"SnowballTarget.Losses.CuriosityInverseLoss.sum": {
	"value": 1.7200559018920827,
	"min": 1.1697937635240736,
	"max": 13.01859488206751,
	"count": 200
	},
	"SnowballTarget.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 200
	},
	"SnowballTarget.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 200
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1707215110",
	"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget-ppo --no-graphics --force",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.2.0+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1707219958"
	},
	"total": 4847.346488092001,
	"count": 1,
	"self": 0.4812986440010718,
	"children": {
	"run_training.setup": {
	"total": 0.07000914399941394,
	"count": 1,
	"self": 0.07000914399941394
	},
	"TrainerController.start_learning": {
	"total": 4846.7951803040005,
	"count": 1,
	"self": 6.687735729853557,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.601962673000344,
	"count": 1,
	"self": 2.601962673000344
	},
	"TrainerController.advance": {
	"total": 4837.395461339146,
	"count": 182018,
	"self": 3.22620029013342,
	"children": {
	"env_step": {
	"total": 4834.169261049013,
	"count": 182018,
	"self": 2938.48247238571,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 1892.3388483431345,
	"count": 182018,
	"self": 16.908851210350804,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 1875.4299971327837,
	"count": 182018,
	"self": 1875.4299971327837
	}
	}
	},
	"workers": {
	"total": 3.3479403201681635,
	"count": 182018,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 4832.826190093931,
	"count": 182018,
	"is_parallel": true,
	"self": 2419.7278922478063,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0029851570006940165,
	"count": 1,
	"is_parallel": true,
	"self": 0.0008539790023860405,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.002131177998307976,
	"count": 10,
	"is_parallel": true,
	"self": 0.002131177998307976
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.038710275999619626,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006204909996085917,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0004150790000494453,
	"count": 1,
	"is_parallel": true,
	"self": 0.0004150790000494453
	},
	"communicator.exchange": {
	"total": 0.03580314400005591,
	"count": 1,
	"is_parallel": true,
	"self": 0.03580314400005591
	},
	"steps_from_proto": {
	"total": 0.0018715619999056798,
	"count": 1,
	"is_parallel": true,
	"self": 0.0003699810004036408,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.001501580999502039,
	"count": 10,
	"is_parallel": true,
	"self": 0.001501580999502039
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2413.0982978461243,
	"count": 182017,
	"is_parallel": true,
	"self": 109.81895432847523,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 54.1800924606805,
	"count": 182017,
	"is_parallel": true,
	"self": 54.1800924606805
	},
	"communicator.exchange": {
	"total": 1900.838314420108,
	"count": 182017,
	"is_parallel": true,
	"self": 1900.838314420108
	},
	"steps_from_proto": {
	"total": 348.2609366368606,
	"count": 182017,
	"is_parallel": true,
	"self": 67.71488828660858,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 280.546048350252,
	"count": 1820170,
	"is_parallel": true,
	"self": 280.546048350252
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 0.001056101000358467,
	"count": 1,
	"self": 0.001056101000358467,
	"children": {
	"thread_root": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"trainer_advance": {
	"total": 4843.667605999015,
	"count": 44043,
	"is_parallel": true,
	"self": 3.7794931780053957,
	"children": {
	"process_trajectory": {
	"total": 614.1665439580274,
	"count": 44043,
	"is_parallel": true,
	"self": 606.9923408430286,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 7.174203114998818,
	"count": 40,
	"is_parallel": true,
	"self": 7.174203114998818
	}
	}
	},
	"_update_policy": {
	"total": 4225.721568862982,
	"count": 767,
	"is_parallel": true,
	"self": 2335.95828481279,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 1889.763284050192,
	"count": 46299,
	"is_parallel": true,
	"self": 1889.763284050192
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"TrainerController._save_models": {
	"total": 0.10896446100014145,
	"count": 1,
	"self": 0.0013898779998271493,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.1075745830003143,
	"count": 1,
	"self": 0.1075745830003143
	}
	}
	}
	}
	}
	}
	}