Reinforcement Learning
ml-agents
TensorBoard
ONNX
SnowballTarget
deep-reinforcement-learning
ML-Agents-SnowballTarget
Instructions to use Revv8/ppo-SnowballTarget with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- ml-agents
How to use Revv8/ppo-SnowballTarget with ml-agents:
mlagents-load-from-hf --repo-id="Revv8/ppo-SnowballTarget" --local-dir="./download: string[]s"
- Notebooks
- Google Colab
- Kaggle
| { | |
| "name": "root", | |
| "gauges": { | |
| "SnowballTarget.Policy.Entropy.mean": { | |
| "value": 0.8354975581169128, | |
| "min": 0.8294440507888794, | |
| "max": 2.5645792484283447, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.Entropy.sum": { | |
| "value": 8014.0927734375, | |
| "min": 8014.0927734375, | |
| "max": 23625.62109375, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Step.mean": { | |
| "value": 199984.0, | |
| "min": 19968.0, | |
| "max": 199984.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Step.sum": { | |
| "value": 199984.0, | |
| "min": 19968.0, | |
| "max": 199984.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.ExtrinsicValueEstimate.mean": { | |
| "value": 12.961666107177734, | |
| "min": 1.846458911895752, | |
| "max": 12.961666107177734, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.ExtrinsicValueEstimate.sum": { | |
| "value": 2657.1416015625, | |
| "min": 108.94107818603516, | |
| "max": 2657.1416015625, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Losses.PolicyLoss.mean": { | |
| "value": 0.06460846437143582, | |
| "min": 0.06046980756207132, | |
| "max": 0.07371379257578975, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Losses.PolicyLoss.sum": { | |
| "value": 0.32304232185717907, | |
| "min": 0.06465277752916639, | |
| "max": 0.3556739848974009, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Losses.ValueLoss.mean": { | |
| "value": 0.1857653231013055, | |
| "min": 0.1857653231013055, | |
| "max": 0.31603154726326466, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Losses.ValueLoss.sum": { | |
| "value": 0.9288266155065275, | |
| "min": 0.31603154726326466, | |
| "max": 1.4239543512755748, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.LearningRate.mean": { | |
| "value": 7.620097459999994e-06, | |
| "min": 7.620097459999994e-06, | |
| "max": 0.00027162000946, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.LearningRate.sum": { | |
| "value": 3.810048729999997e-05, | |
| "min": 3.810048729999997e-05, | |
| "max": 0.0013086000637999998, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.Epsilon.mean": { | |
| "value": 0.10254, | |
| "min": 0.10254, | |
| "max": 0.19054000000000001, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.Epsilon.sum": { | |
| "value": 0.5127, | |
| "min": 0.19054000000000001, | |
| "max": 0.9362, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.Beta.mean": { | |
| "value": 0.0001367459999999999, | |
| "min": 0.0001367459999999999, | |
| "max": 0.004527945999999999, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.Beta.sum": { | |
| "value": 0.0006837299999999995, | |
| "min": 0.0006837299999999995, | |
| "max": 0.02181638, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Environment.EpisodeLength.mean": { | |
| "value": 199.0, | |
| "min": 199.0, | |
| "max": 199.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Environment.EpisodeLength.sum": { | |
| "value": 10945.0, | |
| "min": 2189.0, | |
| "max": 10945.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Environment.CumulativeReward.mean": { | |
| "value": 25.78181818181818, | |
| "min": 9.181818181818182, | |
| "max": 25.78181818181818, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Environment.CumulativeReward.sum": { | |
| "value": 1418.0, | |
| "min": 101.0, | |
| "max": 1418.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.ExtrinsicReward.mean": { | |
| "value": 25.78181818181818, | |
| "min": 9.181818181818182, | |
| "max": 25.78181818181818, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.Policy.ExtrinsicReward.sum": { | |
| "value": 1418.0, | |
| "min": 101.0, | |
| "max": 1418.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.IsTraining.mean": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 19 | |
| }, | |
| "SnowballTarget.IsTraining.sum": { | |
| "value": 1.0, | |
| "min": 1.0, | |
| "max": 1.0, | |
| "count": 19 | |
| } | |
| }, | |
| "metadata": { | |
| "timer_format_version": "0.1.0", | |
| "start_time_seconds": "1777295364", | |
| "python_version": "3.10.12 (main, Mar 3 2026, 11:56:32) [GCC 11.4.0]", | |
| "command_line_arguments": "/usr/local/bin/mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=SnowballTarget1 --no-graphics --resume", | |
| "mlagents_version": "1.2.0.dev0", | |
| "mlagents_envs_version": "1.2.0.dev0", | |
| "communication_protocol_version": "1.5.0", | |
| "pytorch_version": "2.8.0+cu128", | |
| "numpy_version": "1.23.5", | |
| "end_time_seconds": "1777295925" | |
| }, | |
| "total": 561.2359936849998, | |
| "count": 1, | |
| "self": 0.9513471199988999, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.027780918000644306, | |
| "count": 1, | |
| "self": 0.027780918000644306 | |
| }, | |
| "TrainerController.start_learning": { | |
| "total": 560.2568656470003, | |
| "count": 1, | |
| "self": 0.587524446000316, | |
| "children": { | |
| "TrainerController._reset_env": { | |
| "total": 2.104993065000599, | |
| "count": 1, | |
| "self": 2.104993065000599 | |
| }, | |
| "TrainerController.advance": { | |
| "total": 557.4497666309999, | |
| "count": 16664, | |
| "self": 0.6030975679950643, | |
| "children": { | |
| "env_step": { | |
| "total": 396.9355102461268, | |
| "count": 16664, | |
| "self": 341.8756260271575, | |
| "children": { | |
| "SubprocessEnvManager._take_step": { | |
| "total": 54.66314560394039, | |
| "count": 16664, | |
| "self": 2.051443569042931, | |
| "children": { | |
| "TorchPolicy.evaluate": { | |
| "total": 52.61170203489746, | |
| "count": 16664, | |
| "self": 52.61170203489746 | |
| } | |
| } | |
| }, | |
| "workers": { | |
| "total": 0.3967386150288803, | |
| "count": 16664, | |
| "self": 0.0, | |
| "children": { | |
| "worker_root": { | |
| "total": 557.1841120379731, | |
| "count": 16664, | |
| "is_parallel": true, | |
| "self": 262.0590502140103, | |
| "children": { | |
| "run_training.setup": { | |
| "total": 0.0, | |
| "count": 0, | |
| "is_parallel": true, | |
| "self": 0.0, | |
| "children": { | |
| "steps_from_proto": { | |
| "total": 0.002366155000345316, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0006639780012847041, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.0017021769990606117, | |
| "count": 10, | |
| "is_parallel": true, | |
| "self": 0.0017021769990606117 | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 0.054092532000140636, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0007476280006812885, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 0.0004832499998883577, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0004832499998883577 | |
| }, | |
| "communicator.exchange": { | |
| "total": 0.04422045100000105, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.04422045100000105 | |
| }, | |
| "steps_from_proto": { | |
| "total": 0.008641202999569941, | |
| "count": 1, | |
| "is_parallel": true, | |
| "self": 0.0005580739980359795, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 0.008083129001533962, | |
| "count": 10, | |
| "is_parallel": true, | |
| "self": 0.008083129001533962 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "UnityEnvironment.step": { | |
| "total": 295.1250618239628, | |
| "count": 16663, | |
| "is_parallel": true, | |
| "self": 13.319300560050578, | |
| "children": { | |
| "UnityEnvironment._generate_step_input": { | |
| "total": 7.430884576010612, | |
| "count": 16663, | |
| "is_parallel": true, | |
| "self": 7.430884576010612 | |
| }, | |
| "communicator.exchange": { | |
| "total": 226.61178362196824, | |
| "count": 16663, | |
| "is_parallel": true, | |
| "self": 226.61178362196824 | |
| }, | |
| "steps_from_proto": { | |
| "total": 47.76309306593339, | |
| "count": 16663, | |
| "is_parallel": true, | |
| "self": 8.691494328834779, | |
| "children": { | |
| "_process_rank_one_or_two_observation": { | |
| "total": 39.07159873709861, | |
| "count": 166630, | |
| "is_parallel": true, | |
| "self": 39.07159873709861 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_advance": { | |
| "total": 159.91115881687801, | |
| "count": 16664, | |
| "self": 0.7658030419661372, | |
| "children": { | |
| "process_trajectory": { | |
| "total": 29.966465980919565, | |
| "count": 16664, | |
| "self": 29.507753163919006, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.4587128170005599, | |
| "count": 4, | |
| "self": 0.4587128170005599 | |
| } | |
| } | |
| }, | |
| "_update_policy": { | |
| "total": 129.1788897939923, | |
| "count": 83, | |
| "self": 47.87249548197633, | |
| "children": { | |
| "TorchPPOOptimizer.update": { | |
| "total": 81.30639431201598, | |
| "count": 4230, | |
| "self": 81.30639431201598 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "trainer_threads": { | |
| "total": 1.4469997040578164e-06, | |
| "count": 1, | |
| "self": 1.4469997040578164e-06 | |
| }, | |
| "TrainerController._save_models": { | |
| "total": 0.114580057999774, | |
| "count": 1, | |
| "self": 0.0018646319995241356, | |
| "children": { | |
| "RLTrainer._checkpoint": { | |
| "total": 0.11271542600024986, | |
| "count": 1, | |
| "self": 0.11271542600024986 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |