Reinforcement Learning
ml-agents
TensorBoard
ONNX
Pyramids
deep-reinforcement-learning
ML-Agents-Pyramids
Instructions to use Mostanie33/ppo-Pyramids with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Mostanie33/ppo-Pyramids with ml-agents:
mlagents-load-from-hf --repo-id="Mostanie33/ppo-Pyramids" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "Pyramids.Policy.Entropy.mean": { | |
| "value": 0.3414781987667084, | |
| "min": 0.3229568600654602, | |
| "max": 1.4768258333206177, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.Entropy.sum": { | |
| "value": 10249.8095703125, | |
| "min": 9683.5380859375, | |
| "max": 44800.98828125, | |
| "count": 35 | |
| }, | |
| "Pyramids.Step.mean": { | |
| "value": 1049999.0, | |
| "min": 29978.0, | |
| "max": 1049999.0, | |
| "count": 35 | |
| }, | |
| "Pyramids.Step.sum": { | |
| "value": 1049999.0, | |
| "min": 29978.0, | |
| "max": 1049999.0, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 0.6589221954345703, | |
| "min": -0.09065193682909012, | |
| "max": 0.7241567373275757, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 185.81605529785156, | |
| "min": -21.847116470336914, | |
| "max": 212.90208435058594, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.mean": { | |
| "value": 0.02746553160250187, | |
| "min": -0.0077649326995015144, | |
| "max": 0.38338378071784973, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.RndValueEstimate.sum": { | |
| "value": 7.745279788970947, | |
| "min": -2.1425106525421143, | |
| "max": 91.2453384399414, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.mean": { | |
| "value": 0.06801666334723831, | |
| "min": 0.06657217535811819, | |
| "max": 0.0731795493736556, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.PolicyLoss.sum": { | |
| "value": 0.9522332868613363, | |
| "min": 0.4985936847977051, | |
| "max": 1.0672017290004685, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.ValueLoss.mean": { | |
| "value": 0.014023290805407953, | |
| "min": 0.0010239651581319517, | |
| "max": 0.015022778502454804, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.ValueLoss.sum": { | |
| "value": 0.19632607127571133, | |
| "min": 0.010594490329526259, | |
| "max": 0.21031889903436726, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.LearningRate.mean": { | |
| "value": 0.00019658020590184765, | |
| "min": 0.00019658020590184765, | |
| "max": 0.00029838354339596195, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.LearningRate.sum": { | |
| "value": 0.002752122882625867, | |
| "min": 0.0020886848037717336, | |
| "max": 0.003926618891127066, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.Epsilon.mean": { | |
| "value": 0.1655267238095238, | |
| "min": 0.1655267238095238, | |
| "max": 0.19946118095238097, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.Epsilon.sum": { | |
| "value": 2.3173741333333333, | |
| "min": 1.3962282666666668, | |
| "max": 2.7225449333333334, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.Beta.mean": { | |
| "value": 0.00655611970857143, | |
| "min": 0.00655611970857143, | |
| "max": 0.009946171977142856, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.Beta.sum": { | |
| "value": 0.09178567592000002, | |
| "min": 0.06962320384, | |
| "max": 0.13089640604, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.RNDLoss.mean": { | |
| "value": 0.008994421921670437, | |
| "min": 0.008994421921670437, | |
| "max": 0.4189161956310272, | |
| "count": 35 | |
| }, | |
| "Pyramids.Losses.RNDLoss.sum": { | |
| "value": 0.12592190504074097, | |
| "min": 0.12592190504074097, | |
| "max": 2.932413339614868, | |
| "count": 35 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.mean": { | |
| "value": 312.7171717171717, | |
| "min": 260.0357142857143, | |
| "max": 999.0, | |
| "count": 35 | |
| }, | |
| "Pyramids.Environment.EpisodeLength.sum": { | |
| "value": 30959.0, | |
| "min": 16777.0, | |
| "max": 32316.0, | |
| "count": 35 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.mean": { | |
| "value": 1.6854897796803592, | |
| "min": -0.9999871489501768, | |
| "max": 1.722103554090219, | |
| "count": 35 | |
| }, | |
| "Pyramids.Environment.CumulativeReward.sum": { | |
| "value": 165.1779984086752, | |
| "min": -30.999601617455482, | |
| "max": 192.87559805810452, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.mean": { | |
| "value": 1.6854897796803592, | |
| "min": -0.9999871489501768, | |
| "max": 1.722103554090219, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.ExtrinsicReward.sum": { | |
| "value": 165.1779984086752, | |
| "min": -30.999601617455482, | |
| "max": 192.87559805810452, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.RndReward.mean": { | |
| "value": 0.02933361790201161, | |
| "min": 0.027352317558490376, | |
| "max": 7.996843342833659, | |
| "count": 35 | |
| }, | |
| "Pyramids.Policy.RndReward.sum": { | |
| "value": 2.874694554397138, | |
| "min": 2.7080981512117432, | |
| "max": 135.9463368281722, | |
| "count": 35 | |
| }, | |
| "Pyramids.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 35 | |
| }, | |
| "Pyramids.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 35 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1779148830", | |
| "python_version": "3.10.12 (main, Jul 5 2023, 18:54:27) [GCC 11.2.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=Pyramids Training --no-graphics --force", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1779151444" | |
| }, | |
| "total": 2614.020349077, | |
| "count": 1, | |
| "self": 0.6095254410001871, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.024923345999923185, | |
| "count": 1, | |
| "self": 0.024923345999923185 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 2613.38590029, | |
| "count": 1, | |
| "self": 1.5537437299790327, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 3.6679993389998344, | |
| "count": 1, | |
| "self": 3.6679993389998344 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 2608.1613898820206, | |
| "count": 67939, | |
| "self": 1.5957651680046183, | |
| "children": { | |
| "env_step": { | |
| "total": 1887.567435377964, | |
| "count": 67939, | |
| "self": 1718.6781491829333, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 167.9735204170479, | |
| "count": 67939, | |
| "self": 5.242191060067853, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 162.73132935698004, | |
| "count": 66444, | |
| "self": 162.73132935698004 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.9157657779828696, | |
| "count": 67938, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 2607.418744525857, | |
| "count": 67938, | |
| "is_parallel": true, | |
| "self": 1019.0072410697971, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.0036665120001089235, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0018249680001645174, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.001841543999944406, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.001841543999944406 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.04733825799985425, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005757909993917565, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004312840001148288, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004312840001148288 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.04470358100024896, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.04470358100024896 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.0016276020000987046, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0003568700003597769, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0012707319997389277, | |
| "count": 8, | |
| "is_parallel": true, | |
| "self": 0.0012707319997389277 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 1588.41150345606, | |
| "count": 67937, | |
| "is_parallel": true, | |
| "self": 37.4890986830751, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 25.22809416189557, | |
| "count": 67937, | |
| "is_parallel": true, | |
| "self": 25.22809416189557 | |
| }, | |
| "communicator.exchange": { | |
| "total": 1407.0763680820078, | |
| "count": 67937, | |
| "is_parallel": true, | |
| "self": 1407.0763680820078 | |
| }, | |
| "steps_from_proto": { | |
| "total": 118.61794252908157, | |
| "count": 67937, | |
| "is_parallel": true, | |
| "self": 24.62432771082422, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 93.99361481825736, | |
| "count": 543496, | |
| "is_parallel": true, | |
| "self": 93.99361481825736 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 718.9981893360518, | |
| "count": 67938, | |
| "self": 3.0223892421531673, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 137.9773634598996, | |
| "count": 67938, | |
| "self": 137.73225137190002, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.24511208799958695, | |
| "count": 2, | |
| "self": 0.24511208799958695 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 577.9984366339991, | |
| "count": 479, | |
| "self": 320.15141443494895, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 257.8470221990501, | |
| "count": 24201, | |
| "self": 257.8470221990501 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.383000380883459e-06, | |
| "count": 1, | |
| "self": 1.383000380883459e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.0027659560000756755, | |
| "count": 1, | |
| "self": 2.328000027773669e-05, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.002742675999797939, | |
| "count": 1, | |
| "self": 0.002742675999797939 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |