First training of SnowballTarget

db538ac verified 10 months ago

17.6 kB

	{
	"name": "root",
	"gauges": {
	"SnowballTarget.Policy.Entropy.mean": {
	"value": 0.7844546437263489,
	"min": 0.7844546437263489,
	"max": 2.7944178581237793,
	"count": 20
	},
	"SnowballTarget.Policy.Entropy.sum": {
	"value": 39141.1484375,
	"min": 39141.1484375,
	"max": 141151.640625,
	"count": 20
	},
	"SnowballTarget.Step.mean": {
	"value": 999952.0,
	"min": 49936.0,
	"max": 999952.0,
	"count": 20
	},
	"SnowballTarget.Step.sum": {
	"value": 999952.0,
	"min": 49936.0,
	"max": 999952.0,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.mean": {
	"value": 14.047369956970215,
	"min": 0.446770042181015,
	"max": 14.047369956970215,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicValueEstimate.sum": {
	"value": 14075.46484375,
	"min": 443.1958923339844,
	"max": 14075.46484375,
	"count": 20
	},
	"SnowballTarget.Environment.EpisodeLength.mean": {
	"value": 199.0,
	"min": 199.0,
	"max": 199.0,
	"count": 20
	},
	"SnowballTarget.Environment.EpisodeLength.sum": {
	"value": 50347.0,
	"min": 48158.0,
	"max": 50347.0,
	"count": 20
	},
	"SnowballTarget.Environment.CumulativeReward.mean": {
	"value": 27.73913043478261,
	"min": 5.037190082644628,
	"max": 27.73913043478261,
	"count": 20
	},
	"SnowballTarget.Environment.CumulativeReward.sum": {
	"value": 7018.0,
	"min": 1219.0,
	"max": 7018.0,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicReward.mean": {
	"value": 27.73913043478261,
	"min": 5.037190082644628,
	"max": 27.73913043478261,
	"count": 20
	},
	"SnowballTarget.Policy.ExtrinsicReward.sum": {
	"value": 7018.0,
	"min": 1219.0,
	"max": 7018.0,
	"count": 20
	},
	"SnowballTarget.Losses.PolicyLoss.mean": {
	"value": 0.024471899460884745,
	"min": 0.02110136260528634,
	"max": 0.02561229946569074,
	"count": 20
	},
	"SnowballTarget.Losses.PolicyLoss.sum": {
	"value": 0.12235949730442372,
	"min": 0.08440545042114536,
	"max": 0.1280614973284537,
	"count": 20
	},
	"SnowballTarget.Losses.ValueLoss.mean": {
	"value": 0.17242038685083388,
	"min": 0.15438103044405577,
	"max": 0.3238931970596314,
	"count": 20
	},
	"SnowballTarget.Losses.ValueLoss.sum": {
	"value": 0.8621019342541695,
	"min": 0.6175241217762231,
	"max": 1.6194659852981568,
	"count": 20
	},
	"SnowballTarget.Policy.LearningRate.mean": {
	"value": 8.380417206560002e-06,
	"min": 8.380417206560002e-06,
	"max": 0.0002920932026356,
	"count": 20
	},
	"SnowballTarget.Policy.LearningRate.sum": {
	"value": 4.190208603280001e-05,
	"min": 4.190208603280001e-05,
	"max": 0.0013898592367135997,
	"count": 20
	},
	"SnowballTarget.Policy.Epsilon.mean": {
	"value": 0.10279344000000001,
	"min": 0.10279344000000001,
	"max": 0.19736439999999997,
	"count": 20
	},
	"SnowballTarget.Policy.Epsilon.sum": {
	"value": 0.5139672000000001,
	"min": 0.4299776,
	"max": 0.9632863999999999,
	"count": 20
	},
	"SnowballTarget.Policy.Beta.mean": {
	"value": 0.00014939265600000004,
	"min": 0.00014939265600000004,
	"max": 0.00486848356,
	"count": 20
	},
	"SnowballTarget.Policy.Beta.sum": {
	"value": 0.0007469632800000002,
	"min": 0.0007469632800000002,
	"max": 0.02316799136,
	"count": 20
	},
	"SnowballTarget.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 20
	},
	"SnowballTarget.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 20
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1746497860",
	"python_version": "3.10.12 \| packaged by conda-forge \| (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]",
	"command_line_arguments": "/home/txshi/miniconda3/envs/hf-rl-ch5/bin/mlagents-learn ./ml-agents/config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.7.0+cu126",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1746498569"
	},
	"total": 709.1388229509998,
	"count": 1,
	"self": 0.16646175100004257,
	"children": {
	"run_training.setup": {
	"total": 0.009094104999803676,
	"count": 1,
	"self": 0.009094104999803676
	},
	"TrainerController.start_learning": {
	"total": 708.963267095,
	"count": 1,
	"self": 0.6044152279819173,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.3000922839996747,
	"count": 1,
	"self": 1.3000922839996747
	},
	"TrainerController.advance": {
	"total": 707.0031684790183,
	"count": 90928,
	"self": 0.5696742239565538,
	"children": {
	"env_step": {
	"total": 506.8617856400333,
	"count": 90928,
	"self": 299.2319616621412,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 207.23350875200776,
	"count": 90928,
	"self": 2.252718222860949,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 204.98079052914682,
	"count": 90928,
	"self": 204.98079052914682
	}
	}
	},
	"workers": {
	"total": 0.39631522588433654,
	"count": 90928,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 707.9836085219881,
	"count": 90928,
	"is_parallel": true,
	"self": 446.9729496589207,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0006692749998364889,
	"count": 1,
	"is_parallel": true,
	"self": 0.00018863699961002567,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0004806380002264632,
	"count": 10,
	"is_parallel": true,
	"self": 0.0004806380002264632
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.009826122000049509,
	"count": 1,
	"is_parallel": true,
	"self": 0.0001313720003963681,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0001350629995613417,
	"count": 1,
	"is_parallel": true,
	"self": 0.0001350629995613417
	},
	"communicator.exchange": {
	"total": 0.009181767999962176,
	"count": 1,
	"is_parallel": true,
	"self": 0.009181767999962176
	},
	"steps_from_proto": {
	"total": 0.0003779190001296229,
	"count": 1,
	"is_parallel": true,
	"self": 8.528900116289151e-05,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.00029262999896673136,
	"count": 10,
	"is_parallel": true,
	"self": 0.00029262999896673136
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 261.0106588630674,
	"count": 90927,
	"is_parallel": true,
	"self": 11.083211220931389,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 5.414907688028961,
	"count": 90927,
	"is_parallel": true,
	"self": 5.414907688028961
	},
	"communicator.exchange": {
	"total": 213.8126099880019,
	"count": 90927,
	"is_parallel": true,
	"self": 213.8126099880019
	},
	"steps_from_proto": {
	"total": 30.699929966105174,
	"count": 90927,
	"is_parallel": true,
	"self": 6.000352090147317,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 24.699577875957857,
	"count": 909270,
	"is_parallel": true,
	"self": 24.699577875957857
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 199.57170861502846,
	"count": 90928,
	"self": 0.7239496360166413,
	"children": {
	"process_trajectory": {
	"total": 56.00508639601139,
	"count": 90928,
	"self": 53.50316079901086,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 2.501925597000536,
	"count": 40,
	"self": 2.501925597000536
	}
	}
	},
	"_update_policy": {
	"total": 142.84267258300042,
	"count": 95,
	"self": 105.54861193698753,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 37.294060646012895,
	"count": 4750,
	"self": 37.294060646012895
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.2700003177742474e-07,
	"count": 1,
	"self": 4.2700003177742474e-07
	},
	"TrainerController._save_models": {
	"total": 0.05559067700005471,
	"count": 1,
	"self": 0.000957331999870803,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.05463334500018391,
	"count": 1,
	"self": 0.05463334500018391
	}
	}
	}
	}
	}
	}
	}