ppo-PyramidsRND / run_logs /timers.json
LoxKing's picture
First Push
fb92990 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.3409586250782013,
"min": 0.3409586250782013,
"max": 1.4631893634796143,
"count": 33
},
"Pyramids.Policy.Entropy.sum": {
"value": 10256.03515625,
"min": 10256.03515625,
"max": 44387.3125,
"count": 33
},
"Pyramids.Step.mean": {
"value": 989901.0,
"min": 29952.0,
"max": 989901.0,
"count": 33
},
"Pyramids.Step.sum": {
"value": 989901.0,
"min": 29952.0,
"max": 989901.0,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.5369741916656494,
"min": -0.06532968580722809,
"max": 0.5705124735832214,
"count": 33
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 148.74185180664062,
"min": -15.744453430175781,
"max": 158.03195190429688,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.023083945736289024,
"min": -0.005652234889566898,
"max": 0.4116525948047638,
"count": 33
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 6.394252777099609,
"min": -1.543060064315796,
"max": 97.5616683959961,
"count": 33
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06636195156135641,
"min": 0.06449592549428677,
"max": 0.0744791227263502,
"count": 33
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9290673218589898,
"min": 0.49976830871310085,
"max": 1.117186840895253,
"count": 33
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01621793980718524,
"min": 0.0016796464605892244,
"max": 0.01621793980718524,
"count": 33
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.22705115730059336,
"min": 0.02183540398765992,
"max": 0.22705115730059336,
"count": 33
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.39469039227857e-06,
"min": 7.39469039227857e-06,
"max": 0.00029515063018788575,
"count": 33
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00010352566549189999,
"min": 0.00010352566549189999,
"max": 0.0036334951888349998,
"count": 33
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1024648642857143,
"min": 0.1024648642857143,
"max": 0.19838354285714285,
"count": 33
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4345081000000002,
"min": 1.3886848,
"max": 2.6175969000000006,
"count": 33
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0002562399421428572,
"min": 0.0002562399421428572,
"max": 0.00983851593142857,
"count": 33
},
"Pyramids.Policy.Beta.sum": {
"value": 0.00358735919,
"min": 0.00358735919,
"max": 0.1211353835,
"count": 33
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.00997029896825552,
"min": 0.00997029896825552,
"max": 0.5179756879806519,
"count": 33
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.13958418369293213,
"min": 0.13958418369293213,
"max": 3.6258299350738525,
"count": 33
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 353.2696629213483,
"min": 303.9569892473118,
"max": 999.0,
"count": 33
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 31441.0,
"min": 15984.0,
"max": 33999.0,
"count": 33
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.5548288729455737,
"min": -1.0000000521540642,
"max": 1.6696292043067096,
"count": 33
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 139.93459856510162,
"min": -26.014801546931267,
"max": 153.73079891502857,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.5548288729455737,
"min": -1.0000000521540642,
"max": 1.6696292043067096,
"count": 33
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 139.93459856510162,
"min": -26.014801546931267,
"max": 153.73079891502857,
"count": 33
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.036091732072883236,
"min": 0.03435979606846588,
"max": 10.994432546198368,
"count": 33
},
"Pyramids.Policy.RndReward.sum": {
"value": 3.248255886559491,
"min": 2.9354883096093545,
"max": 175.9109207391739,
"count": 33
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 33
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1744228728",
"python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.6.0+cu124",
"numpy_version": "1.23.5",
"end_time_seconds": "1744230982"
},
"total": 2254.131230154,
"count": 1,
"self": 0.533131456000774,
"children": {
"run_training.setup": {
"total": 0.019263628999397042,
"count": 1,
"self": 0.019263628999397042
},
"TrainerController.start_learning": {
"total": 2253.578835069,
"count": 1,
"self": 1.5327855570430984,
"children": {
"TrainerController._reset_env": {
"total": 2.058838669000579,
"count": 1,
"self": 2.058838669000579
},
"TrainerController.advance": {
"total": 2249.897575932956,
"count": 64058,
"self": 1.6346402569261045,
"children": {
"env_step": {
"total": 1553.290111048931,
"count": 64058,
"self": 1389.2932903340825,
"children": {
"SubprocessEnvManager._take_step": {
"total": 163.12948952296392,
"count": 64058,
"self": 4.83628146088995,
"children": {
"TorchPolicy.evaluate": {
"total": 158.29320806207397,
"count": 62543,
"self": 158.29320806207397
}
}
},
"workers": {
"total": 0.8673311918846593,
"count": 64058,
"self": 0.0,
"children": {
"worker_root": {
"total": 2248.3525579610496,
"count": 64058,
"is_parallel": true,
"self": 977.721297275948,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.001972068000213767,
"count": 1,
"is_parallel": true,
"self": 0.0006478300001617754,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013242380000519915,
"count": 8,
"is_parallel": true,
"self": 0.0013242380000519915
}
}
},
"UnityEnvironment.step": {
"total": 0.05241440600002534,
"count": 1,
"is_parallel": true,
"self": 0.0005049679994044709,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.00044961300045542885,
"count": 1,
"is_parallel": true,
"self": 0.00044961300045542885
},
"communicator.exchange": {
"total": 0.04979357200045342,
"count": 1,
"is_parallel": true,
"self": 0.04979357200045342
},
"steps_from_proto": {
"total": 0.0016662529997120146,
"count": 1,
"is_parallel": true,
"self": 0.0003393039996808511,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0013269490000311635,
"count": 8,
"is_parallel": true,
"self": 0.0013269490000311635
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1270.6312606851016,
"count": 64057,
"is_parallel": true,
"self": 32.779730566894614,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 23.63181751812226,
"count": 64057,
"is_parallel": true,
"self": 23.63181751812226
},
"communicator.exchange": {
"total": 1115.5568344349313,
"count": 64057,
"is_parallel": true,
"self": 1115.5568344349313
},
"steps_from_proto": {
"total": 98.66287816515342,
"count": 64057,
"is_parallel": true,
"self": 20.22672552784934,
"children": {
"_process_rank_one_or_two_observation": {
"total": 78.43615263730408,
"count": 512456,
"is_parallel": true,
"self": 78.43615263730408
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 694.9728246270988,
"count": 64058,
"self": 2.904109307204635,
"children": {
"process_trajectory": {
"total": 133.54918917487612,
"count": 64058,
"self": 133.34446863587618,
"children": {
"RLTrainer._checkpoint": {
"total": 0.20472053899993625,
"count": 2,
"self": 0.20472053899993625
}
}
},
"_update_policy": {
"total": 558.5195261450181,
"count": 458,
"self": 305.9218912831284,
"children": {
"TorchPPOOptimizer.update": {
"total": 252.59763486188967,
"count": 22803,
"self": 252.59763486188967
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.0120002116309479e-06,
"count": 1,
"self": 1.0120002116309479e-06
},
"TrainerController._save_models": {
"total": 0.08963389800010191,
"count": 1,
"self": 0.0016863440005181474,
"children": {
"RLTrainer._checkpoint": {
"total": 0.08794755399958376,
"count": 1,
"self": 0.08794755399958376
}
}
}
}
}
}
}