PPO Huggy training

76c8945 verified 12 months ago

17.5 kB

	{
	"name": "root",
	"gauges": {
	"Huggy.Policy.Entropy.mean": {
	"value": 1.4034281969070435,
	"min": 1.4034281969070435,
	"max": 1.4254093170166016,
	"count": 40
	},
	"Huggy.Policy.Entropy.sum": {
	"value": 70506.828125,
	"min": 67530.0,
	"max": 77140.40625,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.mean": {
	"value": 79.13001605136436,
	"min": 75.60736196319019,
	"max": 396.07936507936506,
	"count": 40
	},
	"Huggy.Environment.EpisodeLength.sum": {
	"value": 49298.0,
	"min": 48950.0,
	"max": 49910.0,
	"count": 40
	},
	"Huggy.Step.mean": {
	"value": 1999978.0,
	"min": 49297.0,
	"max": 1999978.0,
	"count": 40
	},
	"Huggy.Step.sum": {
	"value": 1999978.0,
	"min": 49297.0,
	"max": 1999978.0,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.mean": {
	"value": 2.547334909439087,
	"min": 0.03736720234155655,
	"max": 2.547334909439087,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1586.9896240234375,
	"min": 4.670900344848633,
	"max": 1615.748779296875,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.mean": {
	"value": 4.024332938569315,
	"min": 1.6354788365364075,
	"max": 4.024332938569315,
	"count": 40
	},
	"Huggy.Environment.CumulativeReward.sum": {
	"value": 2507.1594207286835,
	"min": 204.43485456705093,
	"max": 2530.5656306147575,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.mean": {
	"value": 4.024332938569315,
	"min": 1.6354788365364075,
	"max": 4.024332938569315,
	"count": 40
	},
	"Huggy.Policy.ExtrinsicReward.sum": {
	"value": 2507.1594207286835,
	"min": 204.43485456705093,
	"max": 2530.5656306147575,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.mean": {
	"value": 0.01572424361736719,
	"min": 0.013764370328378087,
	"max": 0.02050111532056083,
	"count": 40
	},
	"Huggy.Losses.PolicyLoss.sum": {
	"value": 0.04717273085210157,
	"min": 0.027528740656756173,
	"max": 0.05341944813311178,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.mean": {
	"value": 0.06232480626139376,
	"min": 0.02198513010516763,
	"max": 0.06592570630212624,
	"count": 40
	},
	"Huggy.Losses.ValueLoss.sum": {
	"value": 0.18697441878418128,
	"min": 0.04397026021033526,
	"max": 0.19777711890637872,
	"count": 40
	},
	"Huggy.Policy.LearningRate.mean": {
	"value": 3.6188487937499925e-06,
	"min": 3.6188487937499925e-06,
	"max": 0.0002953074765641749,
	"count": 40
	},
	"Huggy.Policy.LearningRate.sum": {
	"value": 1.0856546381249978e-05,
	"min": 1.0856546381249978e-05,
	"max": 0.0008438973187008998,
	"count": 40
	},
	"Huggy.Policy.Epsilon.mean": {
	"value": 0.10120625000000001,
	"min": 0.10120625000000001,
	"max": 0.19843582500000007,
	"count": 40
	},
	"Huggy.Policy.Epsilon.sum": {
	"value": 0.30361875000000005,
	"min": 0.20754604999999998,
	"max": 0.5812991000000002,
	"count": 40
	},
	"Huggy.Policy.Beta.mean": {
	"value": 7.019187499999988e-05,
	"min": 7.019187499999988e-05,
	"max": 0.004921947667499999,
	"count": 40
	},
	"Huggy.Policy.Beta.sum": {
	"value": 0.00021057562499999965,
	"min": 0.00021057562499999965,
	"max": 0.014066825089999999,
	"count": 40
	},
	"Huggy.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	},
	"Huggy.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 40
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1742524218",
	"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
	"command_line_arguments": "/home/samrito/anaconda3/envs/drl_hf/bin/mlagents-learn ./ml-agents/config/ppo/Huggy.yaml --env=./trained-envs-executables/linux/Huggy/Huggy --run-id=Huggy --no-graphics --force",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.6.0+cu124",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1742525039"
	},
	"total": 820.1580611271784,
	"count": 1,
	"self": 0.16697772918269038,
	"children": {
	"run_training.setup": {
	"total": 0.008863725000992417,
	"count": 1,
	"self": 0.008863725000992417
	},
	"TrainerController.start_learning": {
	"total": 819.9822196729947,
	"count": 1,
	"self": 1.8928601217921823,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.0225368258543313,
	"count": 1,
	"self": 1.0225368258543313
	},
	"TrainerController.advance": {
	"total": 817.0091294022277,
	"count": 233093,
	"self": 1.786020978121087,
	"children": {
	"env_step": {
	"total": 633.5002282338683,
	"count": 233093,
	"self": 488.46714628860354,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 143.8156062825583,
	"count": 233093,
	"self": 6.789004281628877,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 137.02660200092942,
	"count": 222956,
	"self": 137.02660200092942
	}
	}
	},
	"workers": {
	"total": 1.2174756627064198,
	"count": 233093,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 817.54456645553,
	"count": 233093,
	"is_parallel": true,
	"self": 436.1020396091044,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0005594110116362572,
	"count": 1,
	"is_parallel": true,
	"self": 0.00010283198207616806,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0004565790295600891,
	"count": 2,
	"is_parallel": true,
	"self": 0.0004565790295600891
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.010016812942922115,
	"count": 1,
	"is_parallel": true,
	"self": 9.863288141787052e-05,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 9.079813025891781e-05,
	"count": 1,
	"is_parallel": true,
	"self": 9.079813025891781e-05
	},
	"communicator.exchange": {
	"total": 0.009559697937220335,
	"count": 1,
	"is_parallel": true,
	"self": 0.009559697937220335
	},
	"steps_from_proto": {
	"total": 0.000267683994024992,
	"count": 1,
	"is_parallel": true,
	"self": 6.234901957213879e-05,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0002053349744528532,
	"count": 2,
	"is_parallel": true,
	"self": 0.0002053349744528532
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 381.4425268464256,
	"count": 233092,
	"is_parallel": true,
	"self": 10.97539081517607,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 19.231896872166544,
	"count": 233092,
	"is_parallel": true,
	"self": 19.231896872166544
	},
	"communicator.exchange": {
	"total": 326.857640276663,
	"count": 233092,
	"is_parallel": true,
	"self": 326.857640276663
	},
	"steps_from_proto": {
	"total": 24.37759888241999,
	"count": 233092,
	"is_parallel": true,
	"self": 8.469585163053125,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 15.908013719366863,
	"count": 466184,
	"is_parallel": true,
	"self": 15.908013719366863
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 181.72288019023836,
	"count": 233093,
	"self": 2.791784961009398,
	"children": {
	"process_trajectory": {
	"total": 65.78023722278886,
	"count": 233093,
	"self": 65.20290142251179,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.5773358002770692,
	"count": 10,
	"self": 0.5773358002770692
	}
	}
	},
	"_update_policy": {
	"total": 113.1508580064401,
	"count": 97,
	"self": 93.06675824220292,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 20.08409976423718,
	"count": 2910,
	"self": 20.08409976423718
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.920883268117905e-07,
	"count": 1,
	"self": 5.920883268117905e-07
	},
	"TrainerController._save_models": {
	"total": 0.057692731032148004,
	"count": 1,
	"self": 0.000729620922356844,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.05696311010979116,
	"count": 1,
	"self": 0.05696311010979116
	}
	}
	}
	}
	}
	}
	}