ppo-PyramidsRND / run_logs /timers.json
qxhxh's picture
First Push
1258490 verified
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.1728253960609436,
"min": 0.16308020055294037,
"max": 1.4763644933700562,
"count": 100
},
"Pyramids.Policy.Entropy.sum": {
"value": 5223.474609375,
"min": 4918.4990234375,
"max": 44786.9921875,
"count": 100
},
"Pyramids.Step.mean": {
"value": 2999965.0,
"min": 29989.0,
"max": 2999965.0,
"count": 100
},
"Pyramids.Step.sum": {
"value": 2999965.0,
"min": 29989.0,
"max": 2999965.0,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.816712498664856,
"min": -0.203117236495018,
"max": 0.8835760354995728,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 243.3803253173828,
"min": -48.34190368652344,
"max": 274.7921447753906,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.019102422520518303,
"min": -0.01084805652499199,
"max": 0.29096683859825134,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 5.692522048950195,
"min": -3.2218728065490723,
"max": 69.25010681152344,
"count": 100
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06926028755454658,
"min": 0.06432453564013399,
"max": 0.07469949284374403,
"count": 100
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.969644025763652,
"min": 0.5052627934525645,
"max": 1.0601396428400596,
"count": 100
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.013062040997153548,
"min": 0.00012047082789129815,
"max": 0.01612036287102552,
"count": 100
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.18286857396014966,
"min": 0.00151611503153783,
"max": 0.23745259088658105,
"count": 100
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.5193066364547668e-06,
"min": 1.5193066364547668e-06,
"max": 0.0002984122719578143,
"count": 100
},
"Pyramids.Policy.LearningRate.sum": {
"value": 2.1270292910366736e-05,
"min": 2.1270292910366736e-05,
"max": 0.0037248839583720332,
"count": 100
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.1005064023809524,
"min": 0.1005064023809524,
"max": 0.19947075714285717,
"count": 100
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4070896333333336,
"min": 1.3962953000000002,
"max": 2.677506533333333,
"count": 100
},
"Pyramids.Policy.Beta.mean": {
"value": 6.058959785714302e-05,
"min": 6.058959785714302e-05,
"max": 0.009947128638571428,
"count": 100
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0008482543700000023,
"min": 0.0008482543700000023,
"max": 0.12416863387000002,
"count": 100
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.007786216679960489,
"min": 0.007055687252432108,
"max": 0.49347636103630066,
"count": 100
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.10900703072547913,
"min": 0.1036483570933342,
"max": 3.4543344974517822,
"count": 100
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 232.61029411764707,
"min": 204.62142857142857,
"max": 999.0,
"count": 100
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 31635.0,
"min": 16532.0,
"max": 33809.0,
"count": 100
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.7232602783752715,
"min": -0.9998625526204705,
"max": 1.7953309267759323,
"count": 100
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 234.36339785903692,
"min": -31.995601683855057,
"max": 249.5509988218546,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.7232602783752715,
"min": -0.9998625526204705,
"max": 1.7953309267759323,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 234.36339785903692,
"min": -31.995601683855057,
"max": 249.5509988218546,
"count": 100
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.018693804626924677,
"min": 0.015408162045394734,
"max": 9.614438253290514,
"count": 100
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.542357429261756,
"min": 2.141734524309868,
"max": 163.44545030593872,
"count": 100
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1768550416",
"python_version": "3.10.19 (main, Oct 21 2025, 16:43:05) [GCC 11.2.0]",
"command_line_arguments": "/home/ninghang/anaconda3/envs/qxh_huggy_env/bin/mlagents-learn ./ml-agents/config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.8.0+cu128",
"numpy_version": "1.23.5",
"end_time_seconds": "1768558905"
},
"total": 8489.217343541328,
"count": 1,
"self": 0.5256404294632375,
"children": {
"run_training.setup": {
"total": 0.042277988977730274,
"count": 1,
"self": 0.042277988977730274
},
"TrainerController.start_learning": {
"total": 8488.649425122887,
"count": 1,
"self": 2.8993346840143204,
"children": {
"TrainerController._reset_env": {
"total": 5.75547471921891,
"count": 1,
"self": 5.75547471921891
},
"TrainerController.advance": {
"total": 8479.920919189695,
"count": 194912,
"self": 3.004710135050118,
"children": {
"env_step": {
"total": 6286.280644763727,
"count": 194912,
"self": 5900.9771484970115,
"children": {
"SubprocessEnvManager._take_step": {
"total": 383.55817039869726,
"count": 194912,
"self": 9.999258208088577,
"children": {
"TorchPolicy.evaluate": {
"total": 373.5589121906087,
"count": 187565,
"self": 373.5589121906087
}
}
},
"workers": {
"total": 1.7453258680179715,
"count": 194912,
"self": 0.0,
"children": {
"worker_root": {
"total": 8478.26584891975,
"count": 194912,
"is_parallel": true,
"self": 2915.2074072700925,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0034880409948527813,
"count": 1,
"is_parallel": true,
"self": 0.0010593901388347149,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0024286508560180664,
"count": 8,
"is_parallel": true,
"self": 0.0024286508560180664
}
}
},
"UnityEnvironment.step": {
"total": 0.040600516833364964,
"count": 1,
"is_parallel": true,
"self": 0.0008473135530948639,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0007738666608929634,
"count": 1,
"is_parallel": true,
"self": 0.0007738666608929634
},
"communicator.exchange": {
"total": 0.03654577676206827,
"count": 1,
"is_parallel": true,
"self": 0.03654577676206827
},
"steps_from_proto": {
"total": 0.0024335598573088646,
"count": 1,
"is_parallel": true,
"self": 0.0005511930212378502,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0018823668360710144,
"count": 8,
"is_parallel": true,
"self": 0.0018823668360710144
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 5563.058441649657,
"count": 194911,
"is_parallel": true,
"self": 149.22693460341543,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 103.57292680395767,
"count": 194911,
"is_parallel": true,
"self": 103.57292680395767
},
"communicator.exchange": {
"total": 4892.346209367737,
"count": 194911,
"is_parallel": true,
"self": 4892.346209367737
},
"steps_from_proto": {
"total": 417.91237087454647,
"count": 194911,
"is_parallel": true,
"self": 86.86643951525912,
"children": {
"_process_rank_one_or_two_observation": {
"total": 331.04593135928735,
"count": 1559288,
"is_parallel": true,
"self": 331.04593135928735
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 2190.6355642909184,
"count": 194912,
"self": 6.148625488393009,
"children": {
"process_trajectory": {
"total": 330.28213377436623,
"count": 194912,
"self": 329.8232858059928,
"children": {
"RLTrainer._checkpoint": {
"total": 0.45884796837344766,
"count": 6,
"self": 0.45884796837344766
}
}
},
"_update_policy": {
"total": 1854.2048050281592,
"count": 1390,
"self": 877.7893710983917,
"children": {
"TorchPPOOptimizer.update": {
"total": 976.4154339297675,
"count": 68439,
"self": 976.4154339297675
}
}
}
}
}
}
},
"trainer_threads": {
"total": 9.690411388874054e-07,
"count": 1,
"self": 9.690411388874054e-07
},
"TrainerController._save_models": {
"total": 0.07369556091725826,
"count": 1,
"self": 0.0011517140083014965,
"children": {
"RLTrainer._checkpoint": {
"total": 0.07254384690895677,
"count": 1,
"self": 0.07254384690895677
}
}
}
}
}
}
}