ppo-Pyramids / run_logs /timers.json
Nablaaa's picture
forYT
51afdc5 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.16866670548915863,
"min": 0.14298048615455627,
"max": 0.2017812728881836,
"count": 78
},
"Pyramids.Policy.Entropy.sum": {
"value": 1689.36572265625,
"min": 258.4290771484375,
"max": 2082.3828125,
"count": 78
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 207.390243902439,
"min": 90.0,
"max": 351.8484848484849,
"count": 78
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 8503.0,
"min": 90.0,
"max": 11822.0,
"count": 78
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 78
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 78
},
"Pyramids.Step.mean": {
"value": 3769914.0,
"min": 3009877.0,
"max": 3769914.0,
"count": 77
},
"Pyramids.Step.sum": {
"value": 3769914.0,
"min": 3009877.0,
"max": 3769914.0,
"count": 77
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.8160757422447205,
"min": 0.7272409200668335,
"max": 0.9346607327461243,
"count": 77
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 79.15934753417969,
"min": 69.97814178466797,
"max": 98.36951446533203,
"count": 77
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.01794920675456524,
"min": -0.008713774383068085,
"max": 0.01794920675456524,
"count": 77
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 1.7410731315612793,
"min": -0.8626636266708374,
"max": 1.7410731315612793,
"count": 77
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.743819502795615,
"min": 1.5269272526105244,
"max": 1.8294038342741819,
"count": 77
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 71.49659961462021,
"min": 50.38859933614731,
"max": 96.69099915027618,
"count": 77
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.743819502795615,
"min": 1.5269272526105244,
"max": 1.8294038342741819,
"count": 77
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 71.49659961462021,
"min": 50.38859933614731,
"max": 96.69099915027618,
"count": 77
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.014612927447249204,
"min": 0.012734388162453588,
"max": 0.024531549192033708,
"count": 77
},
"Pyramids.Policy.RndReward.sum": {
"value": 0.5991300253372174,
"min": 0.5594189549374278,
"max": 0.9172348479623906,
"count": 77
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06972807957596767,
"min": 0.060060778998983246,
"max": 0.07937608590388359,
"count": 77
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.34864039787983836,
"min": 0.24024311599593298,
"max": 0.38904910096122575,
"count": 77
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013218042910254251,
"min": 0.010444670397555455,
"max": 0.017961966310394928,
"count": 77
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.06609021455127126,
"min": 0.04566196344482402,
"max": 0.088228661920784,
"count": 77
},
"Pyramids.Policy.LearningRate.mean": {
"value": 7.4107467297536e-05,
"min": 7.4107467297536e-05,
"max": 0.00011968987510339498,
"count": 77
},
"Pyramids.Policy.LearningRate.sum": {
"value": 0.00037053733648767994,
"min": 0.00030124325958568,
"max": 0.0005955817814728399,
"count": 77
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.124702464,
"min": 0.124702464,
"max": 0.139896605,
"count": 77
},
"Pyramids.Policy.Epsilon.sum": {
"value": 0.62351232,
"min": 0.5004143200000001,
"max": 0.69852716,
"count": 77
},
"Pyramids.Policy.Beta.mean": {
"value": 0.0024777761536,
"min": 0.0024777761536,
"max": 0.0039956708395,
"count": 77
},
"Pyramids.Policy.Beta.sum": {
"value": 0.012388880768,
"min": 0.010071390568,
"max": 0.019882863283999998,
"count": 77
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.0069138286635279655,
"min": 0.006375132128596306,
"max": 0.00757004925981164,
"count": 77
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.0345691442489624,
"min": 0.025500528514385223,
"max": 0.03785024583339691,
"count": 77
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1733172175",
"python_version": "3.10.12 (main, Nov 6 2024, 20:22:13) [GCC 11.4.0]",
"command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=PyramidsYT-1 --no-graphics --resume",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.5.1+cu121",
"numpy_version": "1.23.5",
"end_time_seconds": "1733174509"
},
"total": 2333.7778345630004,
"count": 1,
"self": 0.5862430599991058,
"children": {
"run_training.setup": {
"total": 0.10864878300071723,
"count": 1,
"self": 0.10864878300071723
},
"TrainerController.start_learning": {
"total": 2333.0829427200006,
"count": 1,
"self": 1.2622656740368257,
"children": {
"TrainerController._reset_env": {
"total": 3.962575563000428,
"count": 1,
"self": 3.962575563000428
},
"TrainerController.advance": {
"total": 2327.591626912965,
"count": 51435,
"self": 1.3719331270976909,
"children": {
"env_step": {
"total": 1737.092902000738,
"count": 51435,
"self": 1599.4982726967537,
"children": {
"SubprocessEnvManager._take_step": {
"total": 136.81575405105832,
"count": 51435,
"self": 4.223448720156739,
"children": {
"TorchPolicy.evaluate": {
"total": 132.59230533090158,
"count": 48736,
"self": 132.59230533090158
}
}
},
"workers": {
"total": 0.7788752529259,
"count": 51435,
"self": 0.0,
"children": {
"worker_root": {
"total": 2328.087750315076,
"count": 51435,
"is_parallel": true,
"self": 836.0744946930481,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.003712228999575018,
"count": 1,
"is_parallel": true,
"self": 0.0010917809995589778,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.00262044800001604,
"count": 8,
"is_parallel": true,
"self": 0.00262044800001604
}
}
},
"UnityEnvironment.step": {
"total": 0.0692232609999337,
"count": 1,
"is_parallel": true,
"self": 0.0007942080001157592,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0006458899997596745,
"count": 1,
"is_parallel": true,
"self": 0.0006458899997596745
},
"communicator.exchange": {
"total": 0.0654603290004161,
"count": 1,
"is_parallel": true,
"self": 0.0654603290004161
},
"steps_from_proto": {
"total": 0.002322833999642171,
"count": 1,
"is_parallel": true,
"self": 0.00048281100043823244,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0018400229992039385,
"count": 8,
"is_parallel": true,
"self": 0.0018400229992039385
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 1492.0132556220278,
"count": 51434,
"is_parallel": true,
"self": 29.21466150784181,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 19.64229587713544,
"count": 51434,
"is_parallel": true,
"self": 19.64229587713544
},
"communicator.exchange": {
"total": 1357.231846618146,
"count": 51434,
"is_parallel": true,
"self": 1357.231846618146
},
"steps_from_proto": {
"total": 85.92445161890464,
"count": 51434,
"is_parallel": true,
"self": 18.307256533466898,
"children": {
"_process_rank_one_or_two_observation": {
"total": 67.61719508543774,
"count": 411472,
"is_parallel": true,
"self": 67.61719508543774
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 589.1267917851292,
"count": 51435,
"self": 2.549223707026613,
"children": {
"process_trajectory": {
"total": 120.63775422610252,
"count": 51435,
"self": 118.81704518509923,
"children": {
"RLTrainer._checkpoint": {
"total": 1.8207090410032833,
"count": 16,
"self": 1.8207090410032833
}
}
},
"_update_policy": {
"total": 465.93981385200004,
"count": 369,
"self": 259.033244580819,
"children": {
"TorchPPOOptimizer.update": {
"total": 206.90656927118107,
"count": 17727,
"self": 206.90656927118107
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.5009991329861805e-06,
"count": 1,
"self": 1.5009991329861805e-06
},
"TrainerController._save_models": {
"total": 0.26647306899940304,
"count": 1,
"self": 0.01010396699894045,
"children": {
"RLTrainer._checkpoint": {
"total": 0.2563691020004626,
"count": 1,
"self": 0.2563691020004626
}
}
}
}
}
}
}