ppo-SnowballTarget / run_logs /timers.json

Initial training run

7e916a6 verified 12 months ago

17.6 kB

	{
	"name": "root",
	"gauges": {
	"SnowballTarget.Policy.Entropy.mean": {
	"value": 0.9595369696617126,
	"min": 0.9365046620368958,
	"max": 2.849811315536499,
	"count": 20
	},
	"SnowballTarget.Policy.Entropy.sum": {
	"value": 9119.439453125,
	"min": 9119.439453125,
	"max": 29090.875,
	"count": 20
	},
	"SnowballTarget.Step.mean": {
	"value": 199984.0,
	"min": 9952.0,
	"max": 199984.0,
	"count": 20
	},
	"SnowballTarget.Step.sum": {
	"value": 199984.0,
	"min": 9952.0,
	"max": 199984.0,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
	"value": 13.091958045959473,
	"min": 0.5954446792602539,
	"max": 13.091958045959473,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
	"value": 2552.931884765625,
	"min": 115.51626586914062,
	"max": 2651.05078125,
	"count": 20
	},
	"SnowballTarget.Losses.PolicyLoss.mean": {
	"value": 0.06994937533425971,
	"min": 0.06454695140393382,
	"max": 0.07801521769298395,
	"count": 20
	},
	"SnowballTarget.Losses.PolicyLoss.sum": {
	"value": 0.27979750133703885,
	"min": 0.2581878056157353,
	"max": 0.3806577473882605,
	"count": 20
	},
	"SnowballTarget.Losses.ValueLoss.mean": {
	"value": 0.2034607178294191,
	"min": 0.14633530290687785,
	"max": 0.30647770762443544,
	"count": 20
	},
	"SnowballTarget.Losses.ValueLoss.sum": {
	"value": 0.8138428713176764,
	"min": 0.5853412116275114,
	"max": 1.5323885381221771,
	"count": 20
	},
	"SnowballTarget.Policy.LearningRate.mean": {
	"value": 8.082097306000005e-06,
	"min": 8.082097306000005e-06,
	"max": 0.000291882002706,
	"count": 20
	},
	"SnowballTarget.Policy.LearningRate.sum": {
	"value": 3.232838922400002e-05,
	"min": 3.232838922400002e-05,
	"max": 0.00138516003828,
	"count": 20
	},
	"SnowballTarget.Policy.Epsilon.mean": {
	"value": 0.10269400000000001,
	"min": 0.10269400000000001,
	"max": 0.19729400000000002,
	"count": 20
	},
	"SnowballTarget.Policy.Epsilon.sum": {
	"value": 0.41077600000000003,
	"min": 0.41077600000000003,
	"max": 0.96172,
	"count": 20
	},
	"SnowballTarget.Policy.Beta.mean": {
	"value": 0.0001444306000000001,
	"min": 0.0001444306000000001,
	"max": 0.0048649706,
	"count": 20
	},
	"SnowballTarget.Policy.Beta.sum": {
	"value": 0.0005777224000000004,
	"min": 0.0005777224000000004,
	"max": 0.023089828,
	"count": 20
	},
	"SnowballTarget.Environment.EpisodeLength.mean": {
	"value": 199.0,
	"min": 199.0,
	"max": 199.0,
	"count": 20
	},
	"SnowballTarget.Environment.EpisodeLength.sum": {
	"value": 8756.0,
	"min": 8756.0,
	"max": 10945.0,
	"count": 20
	},
	"SnowballTarget.Environment.CumulativeReward.mean": {
	"value": 25.704545454545453,
	"min": 3.977272727272727,
	"max": 25.704545454545453,
	"count": 20
	},
	"SnowballTarget.Environment.CumulativeReward.sum": {
	"value": 1131.0,
	"min": 175.0,
	"max": 1413.0,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicReward.mean": {
	"value": 25.704545454545453,
	"min": 3.977272727272727,
	"max": 25.704545454545453,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicReward.sum": {
	"value": 1131.0,
	"min": 175.0,
	"max": 1413.0,
	"count": 20
	},
	"SnowballTarget.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 20
	},
	"SnowballTarget.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 20
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1741520369",
	"python_version": "3.10.12 (main, Feb 4 2025, 14:57:36) [GCC 11.4.0]",
	"command_line_arguments": "/home/jonathan/projects/hf-deep-rl/.venv/bin/mlagents-learn ./SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
	"mlagents_version": "1.1.0",
	"mlagents_envs_version": "1.1.0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.6.0+cu124",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1741520690"
	},
	"total": 321.279661551,
	"count": 1,
	"self": 0.21770687199989425,
	"children": {
	"run_training.setup": {
	"total": 0.013230865000082304,
	"count": 1,
	"self": 0.013230865000082304
	},
	"TrainerController.start_learning": {
	"total": 321.048723814,
	"count": 1,
	"self": 0.2430655300044009,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.746840921999933,
	"count": 1,
	"self": 1.746840921999933
	},
	"TrainerController.advance": {
	"total": 319.00256912299574,
	"count": 18192,
	"self": 0.21756725699822255,
	"children": {
	"env_step": {
	"total": 228.88326697300442,
	"count": 18192,
	"self": 156.16429960900803,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 72.56693605799683,
	"count": 18192,
	"self": 0.7962439469933997,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 71.77069211100343,
	"count": 18192,
	"self": 71.77069211100343
	}
	}
	},
	"workers": {
	"total": 0.1520313059995715,
	"count": 18192,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 320.3358378189986,
	"count": 18192,
	"is_parallel": true,
	"self": 178.6593286350069,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0016249419999212478,
	"count": 1,
	"is_parallel": true,
	"self": 0.0007073359998912565,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0009176060000299913,
	"count": 10,
	"is_parallel": true,
	"self": 0.0009176060000299913
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.01570811799990679,
	"count": 1,
	"is_parallel": true,
	"self": 0.00015677299984417914,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00012581700002556317,
	"count": 1,
	"is_parallel": true,
	"self": 0.00012581700002556317
	},
	"communicator.exchange": {
	"total": 0.01490038999997978,
	"count": 1,
	"is_parallel": true,
	"self": 0.01490038999997978
	},
	"steps_from_proto": {
	"total": 0.0005251380000572681,
	"count": 1,
	"is_parallel": true,
	"self": 0.00011398699996334472,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00041115100009392336,
	"count": 10,
	"is_parallel": true,
	"self": 0.00041115100009392336
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 141.6765091839917,
	"count": 18191,
	"is_parallel": true,
	"self": 2.9389007209972533,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 1.688320935003162,
	"count": 18191,
	"is_parallel": true,
	"self": 1.688320935003162
	},
	"communicator.exchange": {
	"total": 128.75629966399913,
	"count": 18191,
	"is_parallel": true,
	"self": 128.75629966399913
	},
	"steps_from_proto": {
	"total": 8.292987863992153,
	"count": 18191,
	"is_parallel": true,
	"self": 1.7572974540063342,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 6.535690409985818,
	"count": 181910,
	"is_parallel": true,
	"self": 6.535690409985818
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 89.90173489299309,
	"count": 18192,
	"self": 0.2946262619859681,
	"children": {
	"process_trajectory": {
	"total": 19.48712901000738,
	"count": 18192,
	"self": 19.206301670007292,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.28082734000008713,
	"count": 4,
	"self": 0.28082734000008713
	}
	}
	},
	"_update_policy": {
	"total": 70.11997962099974,
	"count": 90,
	"self": 15.347345347997589,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 54.772634273002154,
	"count": 4587,
	"self": 54.772634273002154
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 6.650000159424962e-07,
	"count": 1,
	"self": 6.650000159424962e-07
	},
	"TrainerController._save_models": {
	"total": 0.05624757399993996,
	"count": 1,
	"self": 0.0004357860000254732,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.055811787999914486,
	"count": 1,
	"self": 0.055811787999914486
	}
	}
	}
	}
	}
	}
	}