Devyaansh123's picture
Initial commit of trained Pyramids agent
435c815 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.15044279396533966,
"min": 0.13375379145145416,
"max": 1.4381340742111206,
"count": 100
},
"Pyramids.Policy.Entropy.sum": {
"value": 4532.54052734375,
"min": 4063.975341796875,
"max": 43627.234375,
"count": 100
},
"Pyramids.Step.mean": {
"value": 2999898.0,
"min": 29952.0,
"max": 2999898.0,
"count": 100
},
"Pyramids.Step.sum": {
"value": 2999898.0,
"min": 29952.0,
"max": 2999898.0,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7566414475440979,
"min": -0.1753283441066742,
"max": 0.8517789840698242,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 227.7490692138672,
"min": -41.552818298339844,
"max": 253.83013916015625,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.002145596081390977,
"min": -0.007263427600264549,
"max": 0.31291162967681885,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 0.6458244323730469,
"min": -2.157238006591797,
"max": 74.1600570678711,
"count": 100
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06360184082709068,
"min": 0.06360184082709068,
"max": 0.07332028726592572,
"count": 100
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.8904257715792696,
"min": 0.47079630343570655,
"max": 1.0624053268887412,
"count": 100
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.017139451402454396,
"min": 0.000837558993595087,
"max": 0.017827829675211217,
"count": 100
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.23995231963436156,
"min": 0.011725825910331219,
"max": 0.24958961545295702,
"count": 100
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.459492370678568e-06,
"min": 1.459492370678568e-06,
"max": 0.00029838354339596195,
"count": 100
},
"Pyramids.Policy.LearningRate.sum": {
"value": 2.0432893189499952e-05,
"min": 2.0432893189499952e-05,
"max": 0.004011360562879832,
"count": 100
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1004864642857143,
"min": 0.1004864642857143,
"max": 0.19946118095238097,
"count": 100
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4068105000000002,
"min": 1.3962282666666668,
"max": 2.782468866666667,
"count": 100
},
"Pyramids.Policy.Beta.mean": {
"value": 5.8597782142857054e-05,
"min": 5.8597782142857054e-05,
"max": 0.009946171977142856,
"count": 100
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0008203689499999987,
"min": 0.0008203689499999987,
"max": 0.13371830465,
"count": 100
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.005334161687642336,
"min": 0.005208165850490332,
"max": 0.3993033766746521,
"count": 100
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.07467826455831528,
"min": 0.07291432470083237,
"max": 2.79512357711792,
"count": 100
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 234.98461538461538,
"min": 214.96268656716418,
"max": 999.0,
"count": 100
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 30548.0,
"min": 15984.0,
"max": 34306.0,
"count": 100
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.734077508481898,
"min": -1.0000000521540642,
"max": 1.7701074513259218,
"count": 100
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 223.69599859416485,
"min": -27.984401658177376,
"max": 237.19439847767353,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.734077508481898,
"min": -1.0000000521540642,
"max": 1.7701074513259218,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 223.69599859416485,
"min": -27.984401658177376,
"max": 237.19439847767353,
"count": 100
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.0128767684406859,
"min": 0.012593642679577762,
"max": 7.484875182621181,
"count": 100
},
"Pyramids.Policy.RndReward.sum": {
"value": 1.661103128848481,
"min": 1.5345477966766339,
"max": 119.7580029219389,
"count": 100
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1752340452",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.7.1+cu126",
"numpy_version": "1.23.5",
"end_time_seconds": "1752350971"
},
"total": 10519.730625836,
"count": 1,
"self": 0.7588883980006358,
"children": {
"run_training.setup": {
"total": 0.03093724299992573,
"count": 1,
"self": 0.03093724299992573
},
"TrainerController.start_learning": {
"total": 10518.940800195,
"count": 1,
"self": 6.9295339211967075,
"children": {
"TrainerController._reset_env": {
"total": 4.348806910999883,
"count": 1,
"self": 4.348806910999883
},
"TrainerController.advance": {
"total": 10507.566589916805,
"count": 194719,
"self": 7.871507933679823,
"children": {
"env_step": {
"total": 7379.331895034259,
"count": 194719,
"self": 6911.956747127414,
"children": {
"SubprocessEnvManager._take_step": {
"total": 463.32433933604216,
"count": 194719,
"self": 20.951257697458686,
"children": {
"TorchPolicy.evaluate": {
"total": 442.37308163858347,
"count": 187560,
"self": 442.37308163858347
}
}
},
"workers": {
"total": 4.050808570802701,
"count": 194719,
"self": 0.0,
"children": {
"worker_root": {
"total": 10490.406852881766,
"count": 194719,
"is_parallel": true,
"self": 4119.923671542867,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.007261976000108916,
"count": 1,
"is_parallel": true,
"self": 0.004448279999905935,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.002813696000202981,
"count": 8,
"is_parallel": true,
"self": 0.002813696000202981
}
}
},
"UnityEnvironment.step": {
"total": 0.06011857099997542,
"count": 1,
"is_parallel": true,
"self": 0.0006970870001623553,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00064267499988091,
"count": 1,
"is_parallel": true,
"self": 0.00064267499988091
},
"communicator.exchange": {
"total": 0.0566563639999913,
"count": 1,
"is_parallel": true,
"self": 0.0566563639999913
},
"steps_from_proto": {
"total": 0.0021224449999408534,
"count": 1,
"is_parallel": true,
"self": 0.0004974770004082529,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0016249679995326005,
"count": 8,
"is_parallel": true,
"self": 0.0016249679995326005
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 6370.483181338899,
"count": 194718,
"is_parallel": true,
"self": 142.92265311580832,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 92.92594240906715,
"count": 194718,
"is_parallel": true,
"self": 92.92594240906715
},
"communicator.exchange": {
"total": 5745.828472469098,
"count": 194718,
"is_parallel": true,
"self": 5745.828472469098
},
"steps_from_proto": {
"total": 388.8061133449255,
"count": 194718,
"is_parallel": true,
"self": 84.59424011886654,
"children": {
"_process_rank_one_or_two_observation": {
"total": 304.21187322605897,
"count": 1557744,
"is_parallel": true,
"self": 304.21187322605897
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 3120.363186948867,
"count": 194719,
"self": 13.49916869363733,
"children": {
"process_trajectory": {
"total": 486.79231573420634,
"count": 194719,
"self": 486.0928895862064,
"children": {
"RLTrainer._checkpoint": {
"total": 0.6994261479999295,
"count": 6,
"self": 0.6994261479999295
}
}
},
"_update_policy": {
"total": 2620.071702521024,
"count": 1407,
"self": 1052.8617581508422,
"children": {
"TorchPPOOptimizer.update": {
"total": 1567.2099443701816,
"count": 68400,
"self": 1567.2099443701816
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.155998688773252e-06,
"count": 1,
"self": 1.155998688773252e-06
},
"TrainerController._save_models": {
"total": 0.09586828999999852,
"count": 1,
"self": 0.00237369100068463,
"children": {
"RLTrainer._checkpoint": {
"total": 0.0934945989993139,
"count": 1,
"self": 0.0934945989993139
}
}
}
}
}
}
}