dfm794's picture
first pyramid training attempt
1212be1
{
"name": "root",
"gauges": {
"Pyramids.Policy.Entropy.mean": {
"value": 0.12988372147083282,
"min": 0.11910796165466309,
"max": 1.4225294589996338,
"count": 100
},
"Pyramids.Policy.Entropy.sum": {
"value": 3877.80859375,
"min": 3540.841552734375,
"max": 43153.85546875,
"count": 100
},
"Pyramids.Step.mean": {
"value": 2999974.0,
"min": 29993.0,
"max": 2999974.0,
"count": 100
},
"Pyramids.Step.sum": {
"value": 2999974.0,
"min": 29993.0,
"max": 2999974.0,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.7211458086967468,
"min": -0.14819246530532837,
"max": 0.8378350734710693,
"count": 100
},
"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
"value": 216.34373474121094,
"min": -35.269805908203125,
"max": 252.1883544921875,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.mean": {
"value": 0.006574070546776056,
"min": -0.03483591601252556,
"max": 0.7079663276672363,
"count": 100
},
"Pyramids.Policy.RndValueEstimate.sum": {
"value": 1.9722211360931396,
"min": -9.6843843460083,
"max": 168.49598693847656,
"count": 100
},
"Pyramids.Losses.PolicyLoss.mean": {
"value": 0.06823559434546735,
"min": 0.0638692040650692,
"max": 0.0753234517394268,
"count": 100
},
"Pyramids.Losses.PolicyLoss.sum": {
"value": 0.9552983208365428,
"min": 0.5272641621759876,
"max": 1.068676784595785,
"count": 100
},
"Pyramids.Losses.ValueLoss.mean": {
"value": 0.01651956533065199,
"min": 0.0007519519846588888,
"max": 0.01923547001890304,
"count": 100
},
"Pyramids.Losses.ValueLoss.sum": {
"value": 0.23127391462912783,
"min": 0.008271471831247776,
"max": 0.2574878372525906,
"count": 100
},
"Pyramids.Policy.LearningRate.mean": {
"value": 1.5535709107476198e-06,
"min": 1.5535709107476198e-06,
"max": 0.0002984122148149762,
"count": 100
},
"Pyramids.Policy.LearningRate.sum": {
"value": 2.1749992750466676e-05,
"min": 2.1749992750466676e-05,
"max": 0.0038430747189751326,
"count": 100
},
"Pyramids.Policy.Epsilon.mean": {
"value": 0.10051782380952382,
"min": 0.10051782380952382,
"max": 0.1994707380952381,
"count": 100
},
"Pyramids.Policy.Epsilon.sum": {
"value": 1.4072495333333335,
"min": 1.3962951666666668,
"max": 2.707439133333333,
"count": 100
},
"Pyramids.Policy.Beta.mean": {
"value": 6.17305985714286e-05,
"min": 6.17305985714286e-05,
"max": 0.009947126735714283,
"count": 100
},
"Pyramids.Policy.Beta.sum": {
"value": 0.0008642283800000003,
"min": 0.0008642283800000003,
"max": 0.12811438417999998,
"count": 100
},
"Pyramids.Losses.RNDLoss.mean": {
"value": 0.006641863379627466,
"min": 0.006592436693608761,
"max": 0.6367548704147339,
"count": 100
},
"Pyramids.Losses.RNDLoss.sum": {
"value": 0.0929860845208168,
"min": 0.0922941118478775,
"max": 4.457283973693848,
"count": 100
},
"Pyramids.Environment.EpisodeLength.mean": {
"value": 236.6031746031746,
"min": 209.84397163120568,
"max": 999.0,
"count": 100
},
"Pyramids.Environment.EpisodeLength.sum": {
"value": 29812.0,
"min": 16664.0,
"max": 34958.0,
"count": 100
},
"Pyramids.Environment.CumulativeReward.mean": {
"value": 1.7316444302125582,
"min": -0.9999750521965325,
"max": 1.7894999893648285,
"count": 100
},
"Pyramids.Environment.CumulativeReward.sum": {
"value": 218.18719820678234,
"min": -31.99920167028904,
"max": 250.52999851107597,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.mean": {
"value": 1.7316444302125582,
"min": -0.9999750521965325,
"max": 1.7894999893648285,
"count": 100
},
"Pyramids.Policy.ExtrinsicReward.sum": {
"value": 218.18719820678234,
"min": -31.99920167028904,
"max": 250.52999851107597,
"count": 100
},
"Pyramids.Policy.RndReward.mean": {
"value": 0.016364526117011714,
"min": 0.014721549074654052,
"max": 12.14285856222405,
"count": 100
},
"Pyramids.Policy.RndReward.sum": {
"value": 2.061930290743476,
"min": 1.9454383597476408,
"max": 206.42859555780888,
"count": 100
},
"Pyramids.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"Pyramids.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1673759358",
"python_version": "3.8.15 (default, Nov 24 2022, 15:19:38) \n[GCC 11.2.0]",
"command_line_arguments": "/home/dfm/anaconda3/envs/hf-drl-class-u5/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
"mlagents_version": "0.29.0.dev0",
"mlagents_envs_version": "0.29.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.8.1+cu102",
"numpy_version": "1.20.0",
"end_time_seconds": "1673764473"
},
"total": 5115.282702160999,
"count": 1,
"self": 0.31992487609386444,
"children": {
"run_training.setup": {
"total": 0.011667896062135696,
"count": 1,
"self": 0.011667896062135696
},
"TrainerController.start_learning": {
"total": 5114.951109388843,
"count": 1,
"self": 2.545036083087325,
"children": {
"TrainerController._reset_env": {
"total": 2.994590302929282,
"count": 1,
"self": 2.994590302929282
},
"TrainerController.advance": {
"total": 5109.354462707415,
"count": 194530,
"self": 2.2876471001654863,
"children": {
"env_step": {
"total": 3386.0925094373524,
"count": 194530,
"self": 3194.7305759713054,
"children": {
"SubprocessEnvManager._take_step": {
"total": 189.7941021118313,
"count": 194530,
"self": 8.032503155991435,
"children": {
"TorchPolicy.evaluate": {
"total": 181.76159895583987,
"count": 187559,
"self": 62.05167786218226,
"children": {
"TorchPolicy.sample_actions": {
"total": 119.70992109365761,
"count": 187559,
"self": 119.70992109365761
}
}
}
}
},
"workers": {
"total": 1.5678313542157412,
"count": 194530,
"self": 0.0,
"children": {
"worker_root": {
"total": 5108.917170386761,
"count": 194530,
"is_parallel": true,
"self": 2154.2498431280255,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0030338186770677567,
"count": 1,
"is_parallel": true,
"self": 0.0008988324552774429,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0021349862217903137,
"count": 8,
"is_parallel": true,
"self": 0.0021349862217903137
}
}
},
"UnityEnvironment.step": {
"total": 0.04927573911845684,
"count": 1,
"is_parallel": true,
"self": 0.0006872918456792831,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0005838330835103989,
"count": 1,
"is_parallel": true,
"self": 0.0005838330835103989
},
"communicator.exchange": {
"total": 0.0458248145878315,
"count": 1,
"is_parallel": true,
"self": 0.0458248145878315
},
"steps_from_proto": {
"total": 0.0021797996014356613,
"count": 1,
"is_parallel": true,
"self": 0.0005303584039211273,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.001649441197514534,
"count": 8,
"is_parallel": true,
"self": 0.001649441197514534
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2954.667327258736,
"count": 194529,
"is_parallel": true,
"self": 69.76508016139269,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 48.31231448985636,
"count": 194529,
"is_parallel": true,
"self": 48.31231448985636
},
"communicator.exchange": {
"total": 2629.3853055555373,
"count": 194529,
"is_parallel": true,
"self": 2629.3853055555373
},
"steps_from_proto": {
"total": 207.2046270519495,
"count": 194529,
"is_parallel": true,
"self": 48.51141821220517,
"children": {
"_process_rank_one_or_two_observation": {
"total": 158.69320883974433,
"count": 1556232,
"is_parallel": true,
"self": 158.69320883974433
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 1720.9743061698973,
"count": 194530,
"self": 5.460758892819285,
"children": {
"process_trajectory": {
"total": 282.3728479221463,
"count": 194530,
"self": 281.9072140324861,
"children": {
"RLTrainer._checkpoint": {
"total": 0.4656338896602392,
"count": 6,
"self": 0.4656338896602392
}
}
},
"_update_policy": {
"total": 1433.1406993549317,
"count": 1393,
"self": 392.48211096972227,
"children": {
"TorchPPOOptimizer.update": {
"total": 1040.6585883852094,
"count": 68328,
"self": 1040.6585883852094
}
}
}
}
}
}
},
"trainer_threads": {
"total": 5.159527063369751e-07,
"count": 1,
"self": 5.159527063369751e-07
},
"TrainerController._save_models": {
"total": 0.057019779458642006,
"count": 1,
"self": 0.0008876994252204895,
"children": {
"RLTrainer._checkpoint": {
"total": 0.056132080033421516,
"count": 1,
"self": 0.056132080033421516
}
}
}
}
}
}
}