MattStammers commited on
Commit
31c7758
·
1 Parent(s): 096560b

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1695295454.rhmmedcatt-ProLiant-ML350-Gen10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce04327a3be9212af0bdbd1f8c3ef57a5525dd927b3f79e115604757a389e1fc
3
+ size 739984
README.md CHANGED
@@ -5,22 +5,22 @@ tags:
5
  - reinforcement-learning
6
  - sample-factory
7
  model-index:
8
- - name: APPO
9
  results:
10
  - task:
11
  type: reinforcement-learning
12
  name: reinforcement-learning
13
  dataset:
14
- name: mujoco_swimmer
15
- type: mujoco_swimmer
16
  metrics:
17
  - type: mean_reward
18
- value: 39.18 +/- 2.88
19
  name: mean_reward
20
  verified: false
21
  ---
22
 
23
- A(n) **APPO** model trained on the **mujoco_swimmer** environment.
24
 
25
  This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
  Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r MattStammers/appo-mujoco-s
38
 
39
  To run the model after download, use the `enjoy` script corresponding to this environment:
40
  ```
41
- python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_swimmer --train_dir=./train_dir --experiment=appo-mujoco-swimmer
42
  ```
43
 
44
 
@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
49
 
50
  To continue training with this model, use the `train` script corresponding to this environment:
51
  ```
52
- python -m sf_examples.mujoco.train_mujoco --algo=APPO --env=mujoco_swimmer --train_dir=./train_dir --experiment=appo-mujoco-swimmer --restart_behavior=resume --train_for_env_steps=10000000000
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
 
5
  - reinforcement-learning
6
  - sample-factory
7
  model-index:
8
+ - name: ATQC
9
  results:
10
  - task:
11
  type: reinforcement-learning
12
  name: reinforcement-learning
13
  dataset:
14
+ name: mujoco_walker
15
+ type: mujoco_walker
16
  metrics:
17
  - type: mean_reward
18
+ value: 3553.55 +/- 944.12
19
  name: mean_reward
20
  verified: false
21
  ---
22
 
23
+ A(n) **ATQC** model trained on the **mujoco_walker** environment.
24
 
25
  This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
26
  Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
 
38
 
39
  To run the model after download, use the `enjoy` script corresponding to this environment:
40
  ```
41
+ python -m sf_examples.mujoco.enjoy_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer
42
  ```
43
 
44
 
 
49
 
50
  To continue training with this model, use the `train` script corresponding to this environment:
51
  ```
52
+ python -m sf_examples.mujoco.train_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer --restart_behavior=resume --train_for_env_steps=10000000000
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
checkpoint_p0/best_000016776_8589312_reward_4311.623.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:986afd2328fe819d65d8aff66a9fd77c1d5ea1186758b05edb5cf0c761f05c16
3
+ size 83071
checkpoint_p0/checkpoint_000019256_9859072.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939d75e1b639ef1ac4777d43d2f9a2429ca7789a03ce30faa06b046aad0bce75
3
+ size 83335
checkpoint_p0/checkpoint_000019544_10006528.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd50816ac4986e76951727309652d951f620c5555253c82a889f5ac45fdc4b0
3
+ size 83335
config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "help": false,
3
  "algo": "APPO",
4
- "env": "mujoco_swimmer",
5
- "experiment": "Swimmer",
6
  "train_dir": "./train_dir",
7
- "restart_behavior": "resume",
8
  "device": "gpu",
9
  "seed": null,
10
- "num_policies": 2,
11
  "async_rl": false,
12
  "serial_mode": false,
13
  "batched_sampling": false,
@@ -23,7 +23,7 @@
23
  "rollout": 64,
24
  "recurrence": 1,
25
  "shuffle_minibatches": false,
26
- "gamma": 0.9999,
27
  "reward_scale": 1,
28
  "reward_clip": 1000.0,
29
  "value_bootstrap": true,
@@ -105,7 +105,7 @@
105
  "with_wandb": true,
106
  "wandb_user": "matt-stammers",
107
  "wandb_project": "sample_factory",
108
- "wandb_group": "mujoco_swimmer_2",
109
  "wandb_job_type": "SF",
110
  "wandb_tags": [
111
  "mujoco"
@@ -122,14 +122,14 @@
122
  "pbt_target_objective": "true_objective",
123
  "pbt_perturb_min": 1.1,
124
  "pbt_perturb_max": 1.5,
125
- "command_line": "--algo=APPO --env=mujoco_swimmer --experiment=Swimmer --train_dir=./train_dir",
126
  "cli_args": {
127
  "algo": "APPO",
128
- "env": "mujoco_swimmer",
129
- "experiment": "Swimmer",
130
  "train_dir": "./train_dir"
131
  },
132
  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
133
  "git_repo_name": "not a git repository",
134
- "wandb_unique_id": "Swimmer_20230921_091703_025123"
135
  }
 
1
  {
2
  "help": false,
3
  "algo": "APPO",
4
+ "env": "mujoco_walker",
5
+ "experiment": "Walker",
6
  "train_dir": "./train_dir",
7
+ "restart_behavior": "restart",
8
  "device": "gpu",
9
  "seed": null,
10
+ "num_policies": 1,
11
  "async_rl": false,
12
  "serial_mode": false,
13
  "batched_sampling": false,
 
23
  "rollout": 64,
24
  "recurrence": 1,
25
  "shuffle_minibatches": false,
26
+ "gamma": 0.99,
27
  "reward_scale": 1,
28
  "reward_clip": 1000.0,
29
  "value_bootstrap": true,
 
105
  "with_wandb": true,
106
  "wandb_user": "matt-stammers",
107
  "wandb_project": "sample_factory",
108
+ "wandb_group": "mujoco_walker",
109
  "wandb_job_type": "SF",
110
  "wandb_tags": [
111
  "mujoco"
 
122
  "pbt_target_objective": "true_objective",
123
  "pbt_perturb_min": 1.1,
124
  "pbt_perturb_max": 1.5,
125
+ "command_line": "--algo=APPO --env=mujoco_walker --experiment=Walker --train_dir=./train_dir",
126
  "cli_args": {
127
  "algo": "APPO",
128
+ "env": "mujoco_walker",
129
+ "experiment": "Walker",
130
  "train_dir": "./train_dir"
131
  },
132
  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
133
  "git_repo_name": "not a git repository",
134
+ "wandb_unique_id": "Walker_20230921_122411_074084"
135
  }
git.diff CHANGED
The diff for this file is too large to render. See raw diff
 
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ceed1c6db2ef39e2fd4b8b6af0424f865fb7ea53b27854d346b3ff977dfe013
3
- size 5506982
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940b40bece17f417d27d892c4412743dd9ef5ede2e7d834a89d67769b91b3db5
3
+ size 18126693
sf_log.txt CHANGED
The diff for this file is too large to render. See raw diff