Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.summary/0/events.out.tfevents.1695295454.rhmmedcatt-ProLiant-ML350-Gen10 +3 -0
README.md +7 -7
checkpoint_p0/best_000016776_8589312_reward_4311.623.pth +3 -0
checkpoint_p0/checkpoint_000019256_9859072.pth +3 -0
checkpoint_p0/checkpoint_000019544_10006528.pth +3 -0
config.json +10 -10
git.diff +0 -0
replay.mp4 +2 -2
sf_log.txt +0 -0

.summary/0/events.out.tfevents.1695295454.rhmmedcatt-ProLiant-ML350-Gen10 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce04327a3be9212af0bdbd1f8c3ef57a5525dd927b3f79e115604757a389e1fc
+size 739984

README.md CHANGED Viewed

@@ -5,22 +5,22 @@ tags:
 - reinforcement-learning
 - sample-factory
 model-index:
-- name: APPO
   results:
   - task:
       type: reinforcement-learning
       name: reinforcement-learning
     dataset:
-      name: mujoco_swimmer
-      type: mujoco_swimmer
     metrics:
     - type: mean_reward
-      value: 39.18 +/- 2.88
       name: mean_reward
       verified: false
 ---
-A(n) **APPO** model trained on the **mujoco_swimmer** environment.
 This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
 Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r MattStammers/appo-mujoco-s
 To run the model after download, use the `enjoy` script corresponding to this environment:
 ```
-python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_swimmer --train_dir=./train_dir --experiment=appo-mujoco-swimmer
 ```
@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
 To continue training with this model, use the `train` script corresponding to this environment:
 ```
-python -m sf_examples.mujoco.train_mujoco --algo=APPO --env=mujoco_swimmer --train_dir=./train_dir --experiment=appo-mujoco-swimmer --restart_behavior=resume --train_for_env_steps=10000000000
 ```
 Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

 - reinforcement-learning
 - sample-factory
 model-index:
+- name: ATQC
   results:
   - task:
       type: reinforcement-learning
       name: reinforcement-learning
     dataset:
+      name: mujoco_walker
+      type: mujoco_walker
     metrics:
     - type: mean_reward
+      value: 3553.55 +/- 944.12
       name: mean_reward
       verified: false
 ---
+A(n) **ATQC** model trained on the **mujoco_walker** environment.
 This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
 Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
 To run the model after download, use the `enjoy` script corresponding to this environment:
 ```
+python -m sf_examples.mujoco.enjoy_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer
 ```
 To continue training with this model, use the `train` script corresponding to this environment:
 ```
+python -m sf_examples.mujoco.train_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer --restart_behavior=resume --train_for_env_steps=10000000000
 ```
 Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/best_000016776_8589312_reward_4311.623.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:986afd2328fe819d65d8aff66a9fd77c1d5ea1186758b05edb5cf0c761f05c16
+size 83071

checkpoint_p0/checkpoint_000019256_9859072.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:939d75e1b639ef1ac4777d43d2f9a2429ca7789a03ce30faa06b046aad0bce75
+size 83335

checkpoint_p0/checkpoint_000019544_10006528.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cd50816ac4986e76951727309652d951f620c5555253c82a889f5ac45fdc4b0
+size 83335

config.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "help": false,
   "algo": "APPO",
-  "env": "mujoco_swimmer",
-  "experiment": "Swimmer",
   "train_dir": "./train_dir",
-  "restart_behavior": "resume",
   "device": "gpu",
   "seed": null,
-  "num_policies": 2,
   "async_rl": false,
   "serial_mode": false,
   "batched_sampling": false,
@@ -23,7 +23,7 @@
   "rollout": 64,
   "recurrence": 1,
   "shuffle_minibatches": false,
-  "gamma": 0.9999,
   "reward_scale": 1,
   "reward_clip": 1000.0,
   "value_bootstrap": true,
@@ -105,7 +105,7 @@
   "with_wandb": true,
   "wandb_user": "matt-stammers",
   "wandb_project": "sample_factory",
-  "wandb_group": "mujoco_swimmer_2",
   "wandb_job_type": "SF",
   "wandb_tags": [
     "mujoco"
@@ -122,14 +122,14 @@
   "pbt_target_objective": "true_objective",
   "pbt_perturb_min": 1.1,
   "pbt_perturb_max": 1.5,
-  "command_line": "--algo=APPO --env=mujoco_swimmer --experiment=Swimmer --train_dir=./train_dir",
   "cli_args": {
     "algo": "APPO",
-    "env": "mujoco_swimmer",
-    "experiment": "Swimmer",
     "train_dir": "./train_dir"
   },
   "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
   "git_repo_name": "not a git repository",
-  "wandb_unique_id": "Swimmer_20230921_091703_025123"
 }

 {
   "help": false,
   "algo": "APPO",
+  "env": "mujoco_walker",
+  "experiment": "Walker",
   "train_dir": "./train_dir",
+  "restart_behavior": "restart",
   "device": "gpu",
   "seed": null,
+  "num_policies": 1,
   "async_rl": false,
   "serial_mode": false,
   "batched_sampling": false,
   "rollout": 64,
   "recurrence": 1,
   "shuffle_minibatches": false,
+  "gamma": 0.99,
   "reward_scale": 1,
   "reward_clip": 1000.0,
   "value_bootstrap": true,
   "with_wandb": true,
   "wandb_user": "matt-stammers",
   "wandb_project": "sample_factory",
+  "wandb_group": "mujoco_walker",
   "wandb_job_type": "SF",
   "wandb_tags": [
     "mujoco"
   "pbt_target_objective": "true_objective",
   "pbt_perturb_min": 1.1,
   "pbt_perturb_max": 1.5,
+  "command_line": "--algo=APPO --env=mujoco_walker --experiment=Walker --train_dir=./train_dir",
   "cli_args": {
     "algo": "APPO",
+    "env": "mujoco_walker",
+    "experiment": "Walker",
     "train_dir": "./train_dir"
   },
   "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
   "git_repo_name": "not a git repository",
+  "wandb_unique_id": "Walker_20230921_122411_074084"
 }

git.diff CHANGED Viewed

The diff for this file is too large to render. See raw diff

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ceed1c6db2ef39e2fd4b8b6af0424f865fb7ea53b27854d346b3ff977dfe013
-size 5506982

 version https://git-lfs.github.com/spec/v1
+oid sha256:940b40bece17f417d27d892c4412743dd9ef5ede2e7d834a89d67769b91b3db5
+size 18126693

sf_log.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff