BenLearningRL
/

ppoRND-SnowballTarget

Reinforcement Learning

deep-reinforcement-learning

ML-Agents-Pyramids

Model card Files Files and versions

Metrics Training metrics Community

ppoRND-SnowballTarget / run_logs /timers.json

BenLearningRL's picture

First push

c36e88c about 2 years ago

history blame contribute delete

18.7 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.8562152981758118,
	"min": 0.807285487651825,
	"max": 1.463423728942871,
	"count": 33
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 25590.5625,
	"min": 24218.564453125,
	"max": 44394.421875,
	"count": 33
	},
	"Pyramids.Step.mean": {
	"value": 989903.0,
	"min": 29991.0,
	"max": 989903.0,
	"count": 33
	},
	"Pyramids.Step.sum": {
	"value": 989903.0,
	"min": 29991.0,
	"max": 989903.0,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.20928248763084412,
	"min": -0.09928569942712784,
	"max": 0.21552881598472595,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 53.367034912109375,
	"min": -23.828567504882812,
	"max": 55.390907287597656,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.007142363116145134,
	"min": 0.00501882703974843,
	"max": 0.2608388364315033,
	"count": 33
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 1.8213026523590088,
	"min": 1.2446690797805786,
	"max": 62.07964324951172,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06930253798103271,
	"min": 0.06433660516061125,
	"max": 0.07383881720283073,
	"count": 33
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 1.0395380697154906,
	"min": 0.5080633790358692,
	"max": 1.0400704665328648,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.010259400517679752,
	"min": 5.928604987633723e-05,
	"max": 0.010643899758344625,
	"count": 33
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.15389100776519626,
	"min": 0.0008300046982687212,
	"max": 0.15389100776519626,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 7.495397501566668e-06,
	"min": 7.495397501566668e-06,
	"max": 0.0002952367301591857,
	"count": 33
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 0.00011243096252350002,
	"min": 0.00011243096252350002,
	"max": 0.0036342757885748003,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10249843333333335,
	"min": 0.10249843333333335,
	"max": 0.19841224285714285,
	"count": 33
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.5374765000000001,
	"min": 1.3888857,
	"max": 2.6114252000000002,
	"count": 33
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 0.0002595934900000001,
	"min": 0.0002595934900000001,
	"max": 0.00984138306142857,
	"count": 33
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0038939023500000015,
	"min": 0.0038939023500000015,
	"max": 0.12116137747999998,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.009729793295264244,
	"min": 0.009711523540318012,
	"max": 0.3353482186794281,
	"count": 33
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.1459469050168991,
	"min": 0.13596132397651672,
	"max": 2.347437620162964,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 646.6666666666666,
	"min": 571.9814814814815,
	"max": 998.1153846153846,
	"count": 33
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 29100.0,
	"min": 16534.0,
	"max": 32936.0,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 0.8643288460042742,
	"min": -0.9246323072621899,
	"max": 0.909340711241519,
	"count": 33
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 38.89479807019234,
	"min": -30.511401653289795,
	"max": 49.10439840704203,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 0.8643288460042742,
	"min": -0.9246323072621899,
	"max": 0.909340711241519,
	"count": 33
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 38.89479807019234,
	"min": -30.511401653289795,
	"max": 49.10439840704203,
	"count": 33
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.06598277280572802,
	"min": 0.059064141707494855,
	"max": 6.974162520292928,
	"count": 33
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.969224776257761,
	"min": 2.969224776257761,
	"max": 118.56076284497976,
	"count": 33
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 33
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1701163393",
	"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
	"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "1.1.0.dev0",
	"mlagents_envs_version": "1.1.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "2.1.1+cu121",
	"numpy_version": "1.23.5",
	"end_time_seconds": "1701165615"
	},
	"total": 2221.931403825,
	"count": 1,
	"self": 0.4837461689999145,
	"children": {
	"run_training.setup": {
	"total": 0.08162310700026865,
	"count": 1,
	"self": 0.08162310700026865
	},
	"TrainerController.start_learning": {
	"total": 2221.366034549,
	"count": 1,
	"self": 1.6643334320419854,
	"children": {
	"TrainerController._reset_env": {
	"total": 3.374886512000103,
	"count": 1,
	"self": 3.374886512000103
	},
	"TrainerController.advance": {
	"total": 2216.246445965957,
	"count": 63319,
	"self": 1.6124199649789261,
	"children": {
	"env_step": {
	"total": 1543.8654345180425,
	"count": 63319,
	"self": 1402.9127204731135,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 139.9755707390491,
	"count": 63319,
	"self": 4.980998232121237,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 134.99457250692785,
	"count": 62553,
	"self": 134.99457250692785
	}
	}
	},
	"workers": {
	"total": 0.977143305879963,
	"count": 63319,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 2216.3698159679,
	"count": 63319,
	"is_parallel": true,
	"self": 940.8888508627424,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0018476629998076533,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005879809987163753,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.001259682001091278,
	"count": 8,
	"is_parallel": true,
	"self": 0.001259682001091278
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.048652826999841636,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006000319990562275,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00044757300020137336,
	"count": 1,
	"is_parallel": true,
	"self": 0.00044757300020137336
	},
	"communicator.exchange": {
	"total": 0.04579700100021,
	"count": 1,
	"is_parallel": true,
	"self": 0.04579700100021
	},
	"steps_from_proto": {
	"total": 0.0018082210003740329,
	"count": 1,
	"is_parallel": true,
	"self": 0.00043900500031668344,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0013692160000573494,
	"count": 8,
	"is_parallel": true,
	"self": 0.0013692160000573494
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1275.4809651051578,
	"count": 63318,
	"is_parallel": true,
	"self": 35.91644695241621,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 24.771163492896903,
	"count": 63318,
	"is_parallel": true,
	"self": 24.771163492896903
	},
	"communicator.exchange": {
	"total": 1113.791195825866,
	"count": 63318,
	"is_parallel": true,
	"self": 1113.791195825866
	},
	"steps_from_proto": {
	"total": 101.00215883397868,
	"count": 63318,
	"is_parallel": true,
	"self": 20.652890375863535,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 80.34926845811515,
	"count": 506544,
	"is_parallel": true,
	"self": 80.34926845811515
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 670.7685914829353,
	"count": 63319,
	"self": 3.101938601018901,
	"children": {
	"process_trajectory": {
	"total": 130.65349121591726,
	"count": 63319,
	"self": 130.4793840809175,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.17410713499975827,
	"count": 2,
	"self": 0.17410713499975827
	}
	}
	},
	"_update_policy": {
	"total": 537.0131616659992,
	"count": 454,
	"self": 323.6747415859836,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 213.33842008001557,
	"count": 22767,
	"self": 213.33842008001557
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.750002962187864e-07,
	"count": 1,
	"self": 8.750002962187864e-07
	},
	"TrainerController._save_models": {
	"total": 0.08036776400058443,
	"count": 1,
	"self": 0.0014577540005120682,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.07891001000007236,
	"count": 1,
	"self": 0.07891001000007236
	}
	}
	}
	}
	}
	}
	}