Add pyramids agent.

3aab382 verified 11 months ago

18.3 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.9219416975975037,
	"min": 0.9219416975975037,
	"max": 1.3308157920837402,
	"count": 3
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 28056.529296875,
	"min": 28056.529296875,
	"max": 40371.62890625,
	"count": 3
	},
	"Pyramids.Step.mean": {
	"value": 89998.0,
	"min": 29893.0,
	"max": 89998.0,
	"count": 3
	},
	"Pyramids.Step.sum": {
	"value": 89998.0,
	"min": 29893.0,
	"max": 89998.0,
	"count": 3
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": -0.05693092569708824,
	"min": -0.057289861142635345,
	"max": 0.010499139316380024,
	"count": 3
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": -13.777283668518066,
	"min": -13.86414623260498,
	"max": 2.4882960319519043,
	"count": 3
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.19356770813465118,
	"min": 0.19356770813465118,
	"max": 0.652888834476471,
	"count": 3
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 46.8433837890625,
	"min": 46.8433837890625,
	"max": 154.73464965820312,
	"count": 3
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.09366758631134031,
	"min": 0.09366758631134031,
	"max": 0.10365323002324789,
	"count": 3
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 1.1240110357360837,
	"min": 0.8292258401859831,
	"max": 1.1240110357360837,
	"count": 3
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.0011701008608305148,
	"min": 0.0011701008608305148,
	"max": 0.013040119442371574,
	"count": 3
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.014041210329966178,
	"min": 0.014041210329966178,
	"max": 0.1043209555389726,
	"count": 3
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 0.00010079474146799999,
	"min": 0.00010079474146799999,
	"max": 0.00033128851717787496,
	"count": 3
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 0.001209536897616,
	"min": 0.001209536897616,
	"max": 0.0026503081374229997,
	"count": 3
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.12519866666666665,
	"min": 0.12519866666666665,
	"max": 0.182822125,
	"count": 3
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.502384,
	"min": 1.462577,
	"max": 1.5484120000000001,
	"count": 3
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.0025273467999999996,
	"min": 0.0025273467999999996,
	"max": 0.0082839302875,
	"count": 3
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.030328161599999993,
	"min": 0.030328161599999993,
	"max": 0.0662714423,
	"count": 3
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.10764492303133011,
	"min": 0.10764492303133011,
	"max": 0.7087070941925049,
	"count": 3
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 1.2917391061782837,
	"min": 1.2917391061782837,
	"max": 5.669656753540039,
	"count": 3
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 962.7272727272727,
	"min": 959.2941176470588,
	"max": 988.5151515151515,
	"count": 3
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 31770.0,
	"min": 16308.0,
	"max": 32621.0,
	"count": 3
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": -0.842418231521592,
	"min": -0.8682242952512972,
	"max": -0.842418231521592,
	"count": 3
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": -27.799801640212536,
	"min": -28.65140174329281,
	"max": -14.324000805616379,
	"count": 3
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": -0.842418231521592,
	"min": -0.8682242952512972,
	"max": -0.842418231521592,
	"count": 3
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": -27.799801640212536,
	"min": -28.65140174329281,
	"max": -14.324000805616379,
	"count": 3
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 1.2287372043638518,
	"min": 1.2287372043638518,
	"max": 15.582255615907556,
	"count": 3
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 40.54832774400711,
	"min": 40.54832774400711,
	"max": 264.89834547042847,
	"count": 3
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 3
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 3
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1738882241",
	"python_version": "3.11.2 (main, Nov 30 2024, 21:22:50) [GCC 12.2.0]",
	"command_line_arguments": "/home/lucien/Workspace/deep-rl-hugging-face/.venv/bin/mlagents-learn ./Pyramids.yaml --env=./envs/Pyramids/Pyramids --run-id=Pyramids --no-graphics",
	"mlagents_version": "1.2.0.dev0",
	"mlagents_envs_version": "1.2.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.5.1+cpu",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1738882570"
	},
	"total": 329.23776459897636,
	"count": 1,
	"self": 0.32012054495862685,
	"children": {
	"run_training.setup": {
	"total": 0.020413690013810992,
	"count": 1,
	"self": 0.020413690013810992
	},
	"TrainerController.start_learning": {
	"total": 328.8972303640039,
	"count": 1,
	"self": 0.09587996121263131,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.221178787993267,
	"count": 1,
	"self": 2.221178787993267
	},
	"TrainerController.advance": {
	"total": 326.40558701378177,
	"count": 6318,
	"self": 0.07535588680184446,
	"children": {
	"env_step": {
	"total": 281.7097327912343,
	"count": 6318,
	"self": 274.31891261739656,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 7.331330438988516,
	"count": 6318,
	"self": 0.3121476978994906,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 7.0191827410890255,
	"count": 6310,
	"self": 7.0191827410890255
	}
	}
	},
	"workers": {
	"total": 0.059489734849194065,
	"count": 6318,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 328.5259574865049,
	"count": 6318,
	"is_parallel": true,
	"self": 63.05742806071066,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.002009084011660889,
	"count": 1,
	"is_parallel": true,
	"self": 0.001066856988472864,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0009422270231880248,
	"count": 8,
	"is_parallel": true,
	"self": 0.0009422270231880248
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.05068041500635445,
	"count": 1,
	"is_parallel": true,
	"self": 0.00014139199629426003,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.000927670014789328,
	"count": 1,
	"is_parallel": true,
	"self": 0.000927670014789328
	},
	"communicator.exchange": {
	"total": 0.048852696985704824,
	"count": 1,
	"is_parallel": true,
	"self": 0.048852696985704824
	},
	"steps_from_proto": {
	"total": 0.0007586560095660388,
	"count": 1,
	"is_parallel": true,
	"self": 0.00016557800699956715,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0005930780025664717,
	"count": 8,
	"is_parallel": true,
	"self": 0.0005930780025664717
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 265.46852942579426,
	"count": 6317,
	"is_parallel": true,
	"self": 1.0132971906277817,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 5.356598369020503,
	"count": 6317,
	"is_parallel": true,
	"self": 5.356598369020503
	},
	"communicator.exchange": {
	"total": 252.42190109810326,
	"count": 6317,
	"is_parallel": true,
	"self": 252.42190109810326
	},
	"steps_from_proto": {
	"total": 6.6767327680427115,
	"count": 6317,
	"is_parallel": true,
	"self": 1.319251311419066,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 5.3574814566236455,
	"count": 50536,
	"is_parallel": true,
	"self": 5.3574814566236455
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 44.62049833574565,
	"count": 6318,
	"self": 0.14018061952083372,
	"children": {
	"process_trajectory": {
	"total": 6.464301612315467,
	"count": 6318,
	"self": 6.464301612315467
	},
	"_update_policy": {
	"total": 38.01601610390935,
	"count": 34,
	"self": 19.734592373570194,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 18.281423730339156,
	"count": 4566,
	"self": 18.281423730339156
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.110050551593304e-07,
	"count": 1,
	"self": 5.110050551593304e-07
	},
	"TrainerController._save_models": {
	"total": 0.17458409001119435,
	"count": 1,
	"self": 0.0004309089854359627,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.17415318102575839,
	"count": 1,
	"self": 0.17415318102575839
	}
	}
	}
	}
	}
	}
	}