Trained a ppo agent on Huggy environment

fd2e9f2 over 2 years ago

17.4 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.3998006582260132,
	"min": 1.3998006582260132,
	"max": 1.4272023439407349,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 69194.9453125,
	"min": 67751.78125,
	"max": 75544.671875,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 91.47504621072089,
	"min": 75.71932515337423,
	"max": 417.28333333333336,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 49488.0,
	"min": 49241.0,
	"max": 50074.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999968.0,
	"min": 49444.0,
	"max": 1999968.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999968.0,
	"min": 49444.0,
	"max": 1999968.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.4567933082580566,
	"min": 0.07157561182975769,
	"max": 2.5135066509246826,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1329.125244140625,
	"min": 8.517498016357422,
	"max": 1627.0843505859375,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 3.6846552008621796,
	"min": 1.8091764585310672,
	"max": 3.993220277620931,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 1993.398463666439,
	"min": 215.291998565197,
	"max": 2545.5754188895226,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 3.6846552008621796,
	"min": 1.8091764585310672,
	"max": 3.993220277620931,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 1993.398463666439,
	"min": 215.291998565197,
	"max": 2545.5754188895226,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.016287364680384702,
	"min": 0.013577443333876242,
	"max": 0.021632525106381056,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.0488620940411541,
	"min": 0.027154886667752485,
	"max": 0.06489757531914317,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.05415089416007201,
	"min": 0.021869140304625036,
	"max": 0.06380855329334736,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.16245268248021602,
	"min": 0.04373828060925007,
	"max": 0.18688746728003025,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 3.2604989131999965e-06,
	"min": 3.2604989131999965e-06,
	"max": 0.0002953650765449749,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 9.78149673959999e-06,
	"min": 9.78149673959999e-06,
	"max": 0.00084419506860165,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10108679999999998,
	"min": 0.10108679999999998,
	"max": 0.19845502500000006,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.30326039999999993,
	"min": 0.20735050000000002,
	"max": 0.58139835,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 6.423131999999996e-05,
	"min": 6.423131999999996e-05,
	"max": 0.004922905747499999,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00019269395999999987,
	"min": 0.00019269395999999987,
	"max": 0.014071777665,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1701125885",
	"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1701128492"
	},
	"total": 2606.331602055,
	"count": 1,
	"self": 0.4386316960003569,
	"children": {
	"run_training.setup": {
	"total": 0.0578761230000282,
	"count": 1,
	"self": 0.0578761230000282
	},
	"TrainerController.start_learning": {
	"total": 2605.835094236,
	"count": 1,
	"self": 4.7164029870650666,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.4524357049999708,
	"count": 1,
	"self": 3.4524357049999708
	},
	"TrainerController.advance": {
	"total": 2597.5551381679347,
	"count": 232619,
	"self": 5.248476828915045,
	"children": {
	"env_step": {
	"total": 2064.1013671130204,
	"count": 232619,
	"self": 1706.2973381051193,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 354.51953024594786,
	"count": 232619,
	"self": 19.585398118034277,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 334.9341321279136,
	"count": 222983,
	"self": 334.9341321279136
	}
	}
	},
	"workers": {
	"total": 3.284498761953273,
	"count": 232619,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2597.839221838983,
	"count": 232619,
	"is_parallel": true,
	"self": 1217.5345810150093,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0009352030000400191,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002698069999951258,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0006653960000448933,
	"count": 2,
	"is_parallel": true,
	"self": 0.0006653960000448933
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.03299833299990951,
	"count": 1,
	"is_parallel": true,
	"self": 0.00032553699998061347,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00024198099993100186,
	"count": 1,
	"is_parallel": true,
	"self": 0.00024198099993100186
	},
	"communicator.exchange": {
	"total": 0.03167649299996356,
	"count": 1,
	"is_parallel": true,
	"self": 0.03167649299996356
	},
	"steps_from_proto": {
	"total": 0.0007543220000343354,
	"count": 1,
	"is_parallel": true,
	"self": 0.00022837900007743883,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0005259429999568965,
	"count": 2,
	"is_parallel": true,
	"self": 0.0005259429999568965
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1380.3046408239738,
	"count": 232618,
	"is_parallel": true,
	"self": 42.508222180951634,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 92.56040768593789,
	"count": 232618,
	"is_parallel": true,
	"self": 92.56040768593789
	},
	"communicator.exchange": {
	"total": 1148.6785350799987,
	"count": 232618,
	"is_parallel": true,
	"self": 1148.6785350799987
	},
	"steps_from_proto": {
	"total": 96.5574758770855,
	"count": 232618,
	"is_parallel": true,
	"self": 36.48635192488757,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 60.07112395219792,
	"count": 465236,
	"is_parallel": true,
	"self": 60.07112395219792
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 528.2052942259994,
	"count": 232619,
	"self": 7.551763565067631,
	"children": {
	"process_trajectory": {
	"total": 169.90018258292946,
	"count": 232619,
	"self": 168.4146443069301,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 1.4855382759993745,
	"count": 10,
	"self": 1.4855382759993745
	}
	}
	},
	"_update_policy": {
	"total": 350.7533480780023,
	"count": 97,
	"self": 285.68095214200696,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 65.07239593599536,
	"count": 2910,
	"self": 65.07239593599536
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 1.0290000318491366e-06,
	"count": 1,
	"self": 1.0290000318491366e-06
	},
	"TrainerController._save_models": {
	"total": 0.1111163470000065,
	"count": 1,
	"self": 0.0018486520002625184,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.10926769499974398,
	"count": 1,
	"self": 0.10926769499974398
	}
	}
	}
	}
	}
	}
	}