first pyramid training attempt

1212be1 about 3 years ago

19.3 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.12988372147083282,
	"min": 0.11910796165466309,
	"max": 1.4225294589996338,
	"count": 100
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 3877.80859375,
	"min": 3540.841552734375,
	"max": 43153.85546875,
	"count": 100
	},
	"Pyramids.Step.mean": {
	"value": 2999974.0,
	"min": 29993.0,
	"max": 2999974.0,
	"count": 100
	},
	"Pyramids.Step.sum": {
	"value": 2999974.0,
	"min": 29993.0,
	"max": 2999974.0,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.7211458086967468,
	"min": -0.14819246530532837,
	"max": 0.8378350734710693,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 216.34373474121094,
	"min": -35.269805908203125,
	"max": 252.1883544921875,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.006574070546776056,
	"min": -0.03483591601252556,
	"max": 0.7079663276672363,
	"count": 100
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 1.9722211360931396,
	"min": -9.6843843460083,
	"max": 168.49598693847656,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.06823559434546735,
	"min": 0.0638692040650692,
	"max": 0.0753234517394268,
	"count": 100
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9552983208365428,
	"min": 0.5272641621759876,
	"max": 1.068676784595785,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.01651956533065199,
	"min": 0.0007519519846588888,
	"max": 0.01923547001890304,
	"count": 100
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.23127391462912783,
	"min": 0.008271471831247776,
	"max": 0.2574878372525906,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 1.5535709107476198e-06,
	"min": 1.5535709107476198e-06,
	"max": 0.0002984122148149762,
	"count": 100
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 2.1749992750466676e-05,
	"min": 2.1749992750466676e-05,
	"max": 0.0038430747189751326,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10051782380952382,
	"min": 0.10051782380952382,
	"max": 0.1994707380952381,
	"count": 100
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4072495333333335,
	"min": 1.3962951666666668,
	"max": 2.707439133333333,
	"count": 100
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 6.17305985714286e-05,
	"min": 6.17305985714286e-05,
	"max": 0.009947126735714283,
	"count": 100
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0008642283800000003,
	"min": 0.0008642283800000003,
	"max": 0.12811438417999998,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.006641863379627466,
	"min": 0.006592436693608761,
	"max": 0.6367548704147339,
	"count": 100
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.0929860845208168,
	"min": 0.0922941118478775,
	"max": 4.457283973693848,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 236.6031746031746,
	"min": 209.84397163120568,
	"max": 999.0,
	"count": 100
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 29812.0,
	"min": 16664.0,
	"max": 34958.0,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.7316444302125582,
	"min": -0.9999750521965325,
	"max": 1.7894999893648285,
	"count": 100
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 218.18719820678234,
	"min": -31.99920167028904,
	"max": 250.52999851107597,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.7316444302125582,
	"min": -0.9999750521965325,
	"max": 1.7894999893648285,
	"count": 100
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 218.18719820678234,
	"min": -31.99920167028904,
	"max": 250.52999851107597,
	"count": 100
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.016364526117011714,
	"min": 0.014721549074654052,
	"max": 12.14285856222405,
	"count": 100
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.061930290743476,
	"min": 1.9454383597476408,
	"max": 206.42859555780888,
	"count": 100
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 100
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1673759358",
	"python_version": "3.8.15 (default, Nov 24 2022, 15:19:38) \n[GCC 11.2.0]",
	"command_line_arguments": "/home/dfm/anaconda3/envs/hf-drl-class-u5/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.20.0",
	"end_time_seconds": "1673764473"
	},
	"total": 5115.282702160999,
	"count": 1,
	"self": 0.31992487609386444,
	"children": {
	"run_training.setup": {
	"total": 0.011667896062135696,
	"count": 1,
	"self": 0.011667896062135696
	},
	"TrainerController.start_learning": {
	"total": 5114.951109388843,
	"count": 1,
	"self": 2.545036083087325,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.994590302929282,
	"count": 1,
	"self": 2.994590302929282
	},
	"TrainerController.advance": {
	"total": 5109.354462707415,
	"count": 194530,
	"self": 2.2876471001654863,
	"children": {
	"env_step": {
	"total": 3386.0925094373524,
	"count": 194530,
	"self": 3194.7305759713054,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 189.7941021118313,
	"count": 194530,
	"self": 8.032503155991435,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 181.76159895583987,
	"count": 187559,
	"self": 62.05167786218226,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 119.70992109365761,
	"count": 187559,
	"self": 119.70992109365761
	}
	}
	}
	}
	},
	"workers": {
	"total": 1.5678313542157412,
	"count": 194530,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 5108.917170386761,
	"count": 194530,
	"is_parallel": true,
	"self": 2154.2498431280255,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.0030338186770677567,
	"count": 1,
	"is_parallel": true,
	"self": 0.0008988324552774429,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0021349862217903137,
	"count": 8,
	"is_parallel": true,
	"self": 0.0021349862217903137
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.04927573911845684,
	"count": 1,
	"is_parallel": true,
	"self": 0.0006872918456792831,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.0005838330835103989,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005838330835103989
	},
	"communicator.exchange": {
	"total": 0.0458248145878315,
	"count": 1,
	"is_parallel": true,
	"self": 0.0458248145878315
	},
	"steps_from_proto": {
	"total": 0.0021797996014356613,
	"count": 1,
	"is_parallel": true,
	"self": 0.0005303584039211273,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.001649441197514534,
	"count": 8,
	"is_parallel": true,
	"self": 0.001649441197514534
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2954.667327258736,
	"count": 194529,
	"is_parallel": true,
	"self": 69.76508016139269,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 48.31231448985636,
	"count": 194529,
	"is_parallel": true,
	"self": 48.31231448985636
	},
	"communicator.exchange": {
	"total": 2629.3853055555373,
	"count": 194529,
	"is_parallel": true,
	"self": 2629.3853055555373
	},
	"steps_from_proto": {
	"total": 207.2046270519495,
	"count": 194529,
	"is_parallel": true,
	"self": 48.51141821220517,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 158.69320883974433,
	"count": 1556232,
	"is_parallel": true,
	"self": 158.69320883974433
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 1720.9743061698973,
	"count": 194530,
	"self": 5.460758892819285,
	"children": {
	"process_trajectory": {
	"total": 282.3728479221463,
	"count": 194530,
	"self": 281.9072140324861,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.4656338896602392,
	"count": 6,
	"self": 0.4656338896602392
	}
	}
	},
	"_update_policy": {
	"total": 1433.1406993549317,
	"count": 1393,
	"self": 392.48211096972227,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 1040.6585883852094,
	"count": 68328,
	"self": 1040.6585883852094
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 5.159527063369751e-07,
	"count": 1,
	"self": 5.159527063369751e-07
	},
	"TrainerController._save_models": {
	"total": 0.057019779458642006,
	"count": 1,
	"self": 0.0008876994252204895,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.056132080033421516,
	"count": 1,
	"self": 0.056132080033421516
	}
	}
	}
	}
	}
	}
	}