BenLearningRL's picture
First push
c36e88c
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.8562152981758118,
"min": 0.807285487651825,
"max": 1.463423728942871,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 25590.5625,
"min": 24218.564453125,
"max": 44394.421875,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989903.0,
"min": 29991.0,
"max": 989903.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989903.0,
"min": 29991.0,
"max": 989903.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.20928248763084412,
"min": -0.09928569942712784,
"max": 0.21552881598472595,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 53.367034912109375,
"min": -23.828567504882812,
"max": 55.390907287597656,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.007142363116145134,
"min": 0.00501882703974843,
"max": 0.2608388364315033,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 1.8213026523590088,
"min": 1.2446690797805786,
"max": 62.07964324951172,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06930253798103271,
"min": 0.06433660516061125,
"max": 0.07383881720283073,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 1.0395380697154906,
"min": 0.5080633790358692,
"max": 1.0400704665328648,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.010259400517679752,
"min": 5.928604987633723e-05,
"max": 0.010643899758344625,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.15389100776519626,
"min": 0.0008300046982687212,
"max": 0.15389100776519626,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.495397501566668e-06,
"min": 7.495397501566668e-06,
"max": 0.0002952367301591857,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00011243096252350002,
"min": 0.00011243096252350002,
"max": 0.0036342757885748003,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10249843333333335,
"min": 0.10249843333333335,
"max": 0.19841224285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.5374765000000001,
"min": 1.3888857,
"max": 2.6114252000000002,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002595934900000001,
"min": 0.0002595934900000001,
"max": 0.00984138306142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0038939023500000015,
"min": 0.0038939023500000015,
"max": 0.12116137747999998,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.009729793295264244,
"min": 0.009711523540318012,
"max": 0.3353482186794281,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.1459469050168991,
"min": 0.13596132397651672,
"max": 2.347437620162964,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 646.6666666666666,
"min": 571.9814814814815,
"max": 998.1153846153846,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29100.0,
"min": 16534.0,
"max": 32936.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 0.8643288460042742,
"min": -0.9246323072621899,
"max": 0.909340711241519,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 38.89479807019234,
"min": -30.511401653289795,
"max": 49.10439840704203,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 0.8643288460042742,
"min": -0.9246323072621899,
"max": 0.909340711241519,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 38.89479807019234,
"min": -30.511401653289795,
"max": 49.10439840704203,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.06598277280572802,
"min": 0.059064141707494855,
"max": 6.974162520292928,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.969224776257761,
"min": 2.969224776257761,
"max": 118.56076284497976,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1701163393",
"python_version": "3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.1.0.dev0",
"mlagents_envs_version": "1.1.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.1.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1701165615"
},
"total": 2221.931403825,
"count": 1,
"self": 0.4837461689999145,
"children": {
"run_training.setup": {
"total": 0.08162310700026865,
"count": 1,
"self": 0.08162310700026865
},
"TrainerController.start_learning": {
"total": 2221.366034549,
"count": 1,
"self": 1.6643334320419854,
"children": {
"TrainerController._reset_env": {
"total": 3.374886512000103,
"count": 1,
"self": 3.374886512000103
},
"TrainerController.advance": {
"total": 2216.246445965957,
"count": 63319,
"self": 1.6124199649789261,
"children": {
"env_step": {
"total": 1543.8654345180425,
"count": 63319,
"self": 1402.9127204731135,
"children": {
"SubprocessEnvManager._take_step": {
"total": 139.9755707390491,
"count": 63319,
"self": 4.980998232121237,
"children": {
"TorchPolicy.evaluate": {
"total": 134.99457250692785,
"count": 62553,
"self": 134.99457250692785
}
}
},
"workers": {
"total": 0.977143305879963,
"count": 63319,
"self": 0.0,
"children": {
"worker_root": {
"total": 2216.3698159679,
"count": 63319,
"is_parallel": true,
"self": 940.8888508627424,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0018476629998076533,
"count": 1,
"is_parallel": true,
"self": 0.0005879809987163753,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001259682001091278,
"count": 8,
"is_parallel": true,
"self": 0.001259682001091278
}
}
},
"UnityEnvironment.step": {
"total": 0.048652826999841636,
"count": 1,
"is_parallel": true,
"self": 0.0006000319990562275,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00044757300020137336,
"count": 1,
"is_parallel": true,
"self": 0.00044757300020137336
},
"communicator.exchange": {
"total": 0.04579700100021,
"count": 1,
"is_parallel": true,
"self": 0.04579700100021
},
"steps_from_proto": {
"total": 0.0018082210003740329,
"count": 1,
"is_parallel": true,
"self": 0.00043900500031668344,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013692160000573494,
"count": 8,
"is_parallel": true,
"self": 0.0013692160000573494
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1275.4809651051578,
"count": 63318,
"is_parallel": true,
"self": 35.91644695241621,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 24.771163492896903,
"count": 63318,
"is_parallel": true,
"self": 24.771163492896903
},
"communicator.exchange": {
"total": 1113.791195825866,
"count": 63318,
"is_parallel": true,
"self": 1113.791195825866
},
"steps_from_proto": {
"total": 101.00215883397868,
"count": 63318,
"is_parallel": true,
"self": 20.652890375863535,
"children": {
"_process_rank_one_or_two_observation": {
"total": 80.34926845811515,
"count": 506544,
"is_parallel": true,
"self": 80.34926845811515
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 670.7685914829353,
"count": 63319,
"self": 3.101938601018901,
"children": {
"process_trajectory": {
"total": 130.65349121591726,
"count": 63319,
"self": 130.4793840809175,
"children": {
"RLTrainer._checkpoint": {
"total": 0.17410713499975827,
"count": 2,
"self": 0.17410713499975827
}
}
},
"_update_policy": {
"total": 537.0131616659992,
"count": 454,
"self": 323.6747415859836,
"children": {
"TorchPPOOptimizer.update": {
"total": 213.33842008001557,
"count": 22767,
"self": 213.33842008001557
}
}
}
}
}
}
},
"trainer_threads": {
"total": 8.750002962187864e-07,
"count": 1,
"self": 8.750002962187864e-07
},
"TrainerController._save_models": {
"total": 0.08036776400058443,
"count": 1,
"self": 0.0014577540005120682,
"children": {
"RLTrainer._checkpoint": {
"total": 0.07891001000007236,
"count": 1,
"self": 0.07891001000007236
}
}
}
}
}
}
}