ppo-SnowballTarget / run_logs /timers.json

Updated model

be590ed verified 4 months ago

17.6 kB

	{
	"name": "root",
	"gauges": {
	"SnowballTarget.Policy.Entropy.mean": {
	"value": 1.8568122386932373,
	"min": 1.8568122386932373,
	"max": 2.8903470039367676,
	"count": 50
	},
	"SnowballTarget.Policy.Entropy.sum": {
	"value": 17810.54296875,
	"min": 17775.48046875,
	"max": 29759.01171875,
	"count": 50
	},
	"SnowballTarget.Step.mean": {
	"value": 499976.0,
	"min": 9952.0,
	"max": 499976.0,
	"count": 50
	},
	"SnowballTarget.Step.sum": {
	"value": 499976.0,
	"min": 9952.0,
	"max": 499976.0,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
	"value": 6.018762111663818,
	"min": -0.017574388533830643,
	"max": 6.018762111663818,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
	"value": 1233.84619140625,
	"min": -3.602749824523926,
	"max": 1233.84619140625,
	"count": 50
	},
	"SnowballTarget.Environment.EpisodeLength.mean": {
	"value": 199.0,
	"min": 199.0,
	"max": 199.0,
	"count": 50
	},
	"SnowballTarget.Environment.EpisodeLength.sum": {
	"value": 10945.0,
	"min": 8756.0,
	"max": 10945.0,
	"count": 50
	},
	"SnowballTarget.Environment.CumulativeReward.mean": {
	"value": 17.672727272727272,
	"min": 2.727272727272727,
	"max": 18.181818181818183,
	"count": 50
	},
	"SnowballTarget.Environment.CumulativeReward.sum": {
	"value": 972.0,
	"min": 120.0,
	"max": 972.0,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicReward.mean": {
	"value": 17.672727272727272,
	"min": 2.727272727272727,
	"max": 18.181818181818183,
	"count": 50
	},
	"SnowballTarget.Policy.ExtrinsicReward.sum": {
	"value": 972.0,
	"min": 120.0,
	"max": 972.0,
	"count": 50
	},
	"SnowballTarget.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"SnowballTarget.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 50
	},
	"SnowballTarget.Losses.PolicyLoss.mean": {
	"value": 0.035475623733418615,
	"min": 0.03180414954922147,
	"max": 0.03817954923768715,
	"count": 15
	},
	"SnowballTarget.Losses.PolicyLoss.sum": {
	"value": 0.035475623733418615,
	"min": 0.03180414954922147,
	"max": 0.03817954923768715,
	"count": 15
	},
	"SnowballTarget.Losses.ValueLoss.mean": {
	"value": 0.3183900256020327,
	"min": 0.08703206747304648,
	"max": 0.3183900256020327,
	"count": 15
	},
	"SnowballTarget.Losses.ValueLoss.sum": {
	"value": 0.3183900256020327,
	"min": 0.08703206747304648,
	"max": 0.3183900256020327,
	"count": 15
	},
	"SnowballTarget.Policy.LearningRate.mean": {
	"value": 1.0176989823999964e-06,
	"min": 1.0176989823999964e-06,
	"max": 9.341760658240001e-05,
	"count": 15
	},
	"SnowballTarget.Policy.LearningRate.sum": {
	"value": 1.0176989823999964e-06,
	"min": 1.0176989823999964e-06,
	"max": 9.341760658240001e-05,
	"count": 15
	},
	"SnowballTarget.Policy.Epsilon.mean": {
	"value": 0.10101760000000003,
	"min": 0.10101760000000003,
	"max": 0.19341760000000005,
	"count": 15
	},
	"SnowballTarget.Policy.Epsilon.sum": {
	"value": 0.10101760000000003,
	"min": 0.10101760000000003,
	"max": 0.19341760000000005,
	"count": 15
	},
	"SnowballTarget.Policy.Beta.mean": {
	"value": 6.077823999999984e-05,
	"min": 6.077823999999984e-05,
	"max": 0.004671538240000001,
	"count": 15
	},
	"SnowballTarget.Policy.Beta.sum": {
	"value": 6.077823999999984e-05,
	"min": 6.077823999999984e-05,
	"max": 0.004671538240000001,
	"count": 15
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1760689714",
	"python_version": "3.10.12 (main, Aug 15 2025, 14:32:43) [GCC 11.4.0]",
	"command_line_arguments": "/local_scratch/ahmohame/MyRLProj/.venv/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --force",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.3.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1760690960"
	},
	"total": 1246.1255109719932,
	"count": 1,
	"self": 0.374010790954344,
	"children": {
	"run_training.setup": {
	"total": 0.02128842705860734,
	"count": 1,
	"self": 0.02128842705860734
	},
	"TrainerController.start_learning": {
	"total": 1245.7302117539803,
	"count": 1,
	"self": 0.6739412506576627,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.9158263229765,
	"count": 1,
	"self": 2.9158263229765
	},
	"TrainerController.advance": {
	"total": 1242.0460719174007,
	"count": 45464,
	"self": 0.6814981006318703,
	"children": {
	"env_step": {
	"total": 1054.3282872018171,
	"count": 45464,
	"self": 919.0131197725423,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 134.8636818312807,
	"count": 45464,
	"self": 2.2304799418197945,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 132.6332018894609,
	"count": 45464,
	"self": 132.6332018894609
	}
	}
	},
	"workers": {
	"total": 0.45148559799417853,
	"count": 45464,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 1243.1112274315674,
	"count": 45464,
	"is_parallel": true,
	"self": 398.501711131772,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.00440782296936959,
	"count": 1,
	"is_parallel": true,
	"self": 0.0011371220462024212,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.003270700923167169,
	"count": 10,
	"is_parallel": true,
	"self": 0.003270700923167169
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04261121794115752,
	"count": 1,
	"is_parallel": true,
	"self": 0.0009077248396351933,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0006594400620087981,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006594400620087981
	},
	"communicator.exchange": {
	"total": 0.03817833599168807,
	"count": 1,
	"is_parallel": true,
	"self": 0.03817833599168807
	},
	"steps_from_proto": {
	"total": 0.0028657170478254557,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005921600386500359,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00227355700917542,
	"count": 10,
	"is_parallel": true,
	"self": 0.00227355700917542
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 844.6095162997954,
	"count": 45463,
	"is_parallel": true,
	"self": 35.7958256395068,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 21.017438556067646,
	"count": 45463,
	"is_parallel": true,
	"self": 21.017438556067646
	},
	"communicator.exchange": {
	"total": 676.2517781199422,
	"count": 45463,
	"is_parallel": true,
	"self": 676.2517781199422
	},
	"steps_from_proto": {
	"total": 111.54447398427874,
	"count": 45463,
	"is_parallel": true,
	"self": 21.141052892548032,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 90.4034210917307,
	"count": 454630,
	"is_parallel": true,
	"self": 90.4034210917307
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 187.03628661495168,
	"count": 45464,
	"self": 0.7960933083668351,
	"children": {
	"process_trajectory": {
	"total": 50.32543419068679,
	"count": 45464,
	"self": 49.403155541745946,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.9222786489408463,
	"count": 10,
	"self": 0.9222786489408463
	}
	}
	},
	"_update_policy": {
	"total": 135.91475911589805,
	"count": 15,
	"self": 79.53988587856293,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 56.37487323733512,
	"count": 2880,
	"self": 56.37487323733512
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.719507604837418e-07,
	"count": 1,
	"self": 8.719507604837418e-07
	},
	"TrainerController._save_models": {
	"total": 0.0943713909946382,
	"count": 1,
	"self": 0.0012160909827798605,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.09315530001185834,
	"count": 1,
	"self": 0.09315530001185834
	}
	}
	}
	}
	}
	}
	}