First Push

e1ca46d verified about 2 years ago

18.8 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.15252816677093506,
	"min": 0.14906516671180725,
	"max": 1.414180874824524,
	"count": 100
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 4588.04736328125,
	"min": 4464.7998046875,
	"max": 42900.58984375,
	"count": 100
	},
	"Pyramids.Step.mean": {
	"value": 2999890.0,
	"min": 29952.0,
	"max": 2999890.0,
	"count": 100
	},
	"Pyramids.Step.sum": {
	"value": 2999890.0,
	"min": 29952.0,
	"max": 2999890.0,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.8182879686355591,
	"min": -0.1800912469625473,
	"max": 0.8639610409736633,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 252.85098266601562,
	"min": -42.68162536621094,
	"max": 264.73980712890625,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.01633444055914879,
	"min": -0.01727745309472084,
	"max": 0.46886947751045227,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 5.047342300415039,
	"min": -4.9931840896606445,
	"max": 111.12206268310547,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06845543435645393,
	"min": 0.06536668095607585,
	"max": 0.07368367805766238,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 1.026831515346809,
	"min": 0.49879954672351967,
	"max": 1.092246385746608,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.016203644212024904,
	"min": 0.0006459115332719983,
	"max": 0.0173887597076257,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.24305466318037358,
	"min": 0.009042761465807975,
	"max": 0.2443290390291105,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 1.5214728262088886e-06,
	"min": 1.5214728262088886e-06,
	"max": 0.00029838354339596195,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 2.282209239313333e-05,
	"min": 2.282209239313333e-05,
	"max": 0.0039690933769689005,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10050712444444444,
	"min": 0.10050712444444444,
	"max": 0.19946118095238097,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.5076068666666667,
	"min": 1.3962282666666668,
	"max": 2.767539433333334,
	"count": 100
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 6.066173200000002e-05,
	"min": 6.066173200000002e-05,
	"max": 0.009946171977142856,
	"count": 100
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0009099259800000002,
	"min": 0.0009099259800000002,
	"max": 0.13231080689000002,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.00619171280413866,
	"min": 0.006113977171480656,
	"max": 0.5689794421195984,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.09287568926811218,
	"min": 0.08559568226337433,
	"max": 3.982856035232544,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 211.73333333333332,
	"min": 210.13138686131387,
	"max": 999.0,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 31760.0,
	"min": 15984.0,
	"max": 33377.0,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.73392078820491,
	"min": -1.0000000521540642,
	"max": 1.7871792959755866,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 258.3541974425316,
	"min": -30.354801654815674,
	"max": 259.14099791646004,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.73392078820491,
	"min": -1.0000000521540642,
	"max": 1.7871792959755866,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 258.3541974425316,
	"min": -30.354801654815674,
	"max": 259.14099791646004,
	"count": 100
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.013677320852500681,
	"min": 0.013677320852500681,
	"max": 11.693298460915685,
	"count": 100
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.0379208070226014,
	"min": 1.8333403850265313,
	"max": 187.09277537465096,
	"count": 100
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1705560733",
	"python_version": "3.10.12 (main, Jan 18 2024, 15:14:31) [GCC 11.4.0]",
	"command_line_arguments": "/home/omar/.pyenv/versions/3.10.12/envs/rlhf/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.2+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1705562784"
	},
	"total": 2051.1559370680006,
	"count": 1,
	"self": 0.16801876400131732,
	"children": {
	"run_training.setup": {
	"total": 0.010624038000059954,
	"count": 1,
	"self": 0.010624038000059954
	},
	"TrainerController.start_learning": {
	"total": 2050.9772942659993,
	"count": 1,
	"self": 1.6147928751834115,
	"children": {
	"TrainerController._reset_env": {
	"total": 0.6259975359998862,
	"count": 1,
	"self": 0.6259975359998862
	},
	"TrainerController.advance": {
	"total": 2048.6991612388165,
	"count": 195036,
	"self": 1.4775207274569766,
	"children": {
	"env_step": {
	"total": 1287.8277061731478,
	"count": 195036,
	"self": 1166.8335389201993,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 119.98410006001086,
	"count": 195036,
	"self": 5.1350317988071765,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 114.84906826120368,
	"count": 187567,
	"self": 114.84906826120368
	}
	}
	},
	"workers": {
	"total": 1.0100671929376404,
	"count": 195036,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2048.7941494200104,
	"count": 195036,
	"is_parallel": true,
	"self": 995.055227156945,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.000967019000199798,
	"count": 1,
	"is_parallel": true,
	"self": 0.00026698600140662165,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0007000329987931764,
	"count": 8,
	"is_parallel": true,
	"self": 0.0007000329987931764
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.01505296899995301,
	"count": 1,
	"is_parallel": true,
	"self": 0.0001391280002280837,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00020458800008782418,
	"count": 1,
	"is_parallel": true,
	"self": 0.00020458800008782418
	},
	"communicator.exchange": {
	"total": 0.01417733899961604,
	"count": 1,
	"is_parallel": true,
	"self": 0.01417733899961604
	},
	"steps_from_proto": {
	"total": 0.0005319140000210609,
	"count": 1,
	"is_parallel": true,
	"self": 0.00015708800128777511,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0003748259987332858,
	"count": 8,
	"is_parallel": true,
	"self": 0.0003748259987332858
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1053.7389222630654,
	"count": 195035,
	"is_parallel": true,
	"self": 20.99056299007316,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 17.357348764071503,
	"count": 195035,
	"is_parallel": true,
	"self": 17.357348764071503
	},
	"communicator.exchange": {
	"total": 947.2814714219066,
	"count": 195035,
	"is_parallel": true,
	"self": 947.2814714219066
	},
	"steps_from_proto": {
	"total": 68.10953908701413,
	"count": 195035,
	"is_parallel": true,
	"self": 14.092228908545621,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 54.017310178468506,
	"count": 1560280,
	"is_parallel": true,
	"self": 54.017310178468506
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 759.3939343382117,
	"count": 195036,
	"self": 3.448474896033076,
	"children": {
	"process_trajectory": {
	"total": 163.58176201317292,
	"count": 195036,
	"self": 163.32704501017452,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2547170029984045,
	"count": 6,
	"self": 0.2547170029984045
	}
	}
	},
	"_update_policy": {
	"total": 592.3636974290057,
	"count": 1398,
	"self": 391.9358900801535,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 200.42780734885218,
	"count": 68391,
	"self": 200.42780734885218
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.800002327305265e-07,
	"count": 1,
	"self": 5.800002327305265e-07
	},
	"TrainerController._save_models": {
	"total": 0.03734203599924513,
	"count": 1,
	"self": 0.0009203109993904945,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.03642172499985463,
	"count": 1,
	"self": 0.03642172499985463
	}
	}
	}
	}
	}
	}
	}