Commit
·
31c7758
1
Parent(s):
096560b
Upload folder using huggingface_hub
Browse files- .summary/0/events.out.tfevents.1695295454.rhmmedcatt-ProLiant-ML350-Gen10 +3 -0
- README.md +7 -7
- checkpoint_p0/best_000016776_8589312_reward_4311.623.pth +3 -0
- checkpoint_p0/checkpoint_000019256_9859072.pth +3 -0
- checkpoint_p0/checkpoint_000019544_10006528.pth +3 -0
- config.json +10 -10
- git.diff +0 -0
- replay.mp4 +2 -2
- sf_log.txt +0 -0
.summary/0/events.out.tfevents.1695295454.rhmmedcatt-ProLiant-ML350-Gen10
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce04327a3be9212af0bdbd1f8c3ef57a5525dd927b3f79e115604757a389e1fc
|
| 3 |
+
size 739984
|
README.md
CHANGED
|
@@ -5,22 +5,22 @@ tags:
|
|
| 5 |
- reinforcement-learning
|
| 6 |
- sample-factory
|
| 7 |
model-index:
|
| 8 |
-
- name:
|
| 9 |
results:
|
| 10 |
- task:
|
| 11 |
type: reinforcement-learning
|
| 12 |
name: reinforcement-learning
|
| 13 |
dataset:
|
| 14 |
-
name:
|
| 15 |
-
type:
|
| 16 |
metrics:
|
| 17 |
- type: mean_reward
|
| 18 |
-
value:
|
| 19 |
name: mean_reward
|
| 20 |
verified: false
|
| 21 |
---
|
| 22 |
|
| 23 |
-
A(n) **
|
| 24 |
|
| 25 |
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
| 26 |
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
|
@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r MattStammers/appo-mujoco-s
|
|
| 38 |
|
| 39 |
To run the model after download, use the `enjoy` script corresponding to this environment:
|
| 40 |
```
|
| 41 |
-
python -m sf_examples.mujoco.enjoy_mujoco --algo=
|
| 42 |
```
|
| 43 |
|
| 44 |
|
|
@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
|
| 49 |
|
| 50 |
To continue training with this model, use the `train` script corresponding to this environment:
|
| 51 |
```
|
| 52 |
-
python -m sf_examples.mujoco.train_mujoco --algo=
|
| 53 |
```
|
| 54 |
|
| 55 |
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
|
|
|
| 5 |
- reinforcement-learning
|
| 6 |
- sample-factory
|
| 7 |
model-index:
|
| 8 |
+
- name: ATQC
|
| 9 |
results:
|
| 10 |
- task:
|
| 11 |
type: reinforcement-learning
|
| 12 |
name: reinforcement-learning
|
| 13 |
dataset:
|
| 14 |
+
name: mujoco_walker
|
| 15 |
+
type: mujoco_walker
|
| 16 |
metrics:
|
| 17 |
- type: mean_reward
|
| 18 |
+
value: 3553.55 +/- 944.12
|
| 19 |
name: mean_reward
|
| 20 |
verified: false
|
| 21 |
---
|
| 22 |
|
| 23 |
+
A(n) **ATQC** model trained on the **mujoco_walker** environment.
|
| 24 |
|
| 25 |
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
| 26 |
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
|
|
|
| 38 |
|
| 39 |
To run the model after download, use the `enjoy` script corresponding to this environment:
|
| 40 |
```
|
| 41 |
+
python -m sf_examples.mujoco.enjoy_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer
|
| 42 |
```
|
| 43 |
|
| 44 |
|
|
|
|
| 49 |
|
| 50 |
To continue training with this model, use the `train` script corresponding to this environment:
|
| 51 |
```
|
| 52 |
+
python -m sf_examples.mujoco.train_mujoco --algo=ATQC --env=mujoco_walker --train_dir=./train_dir --experiment=appo-mujoco-swimmer --restart_behavior=resume --train_for_env_steps=10000000000
|
| 53 |
```
|
| 54 |
|
| 55 |
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
checkpoint_p0/best_000016776_8589312_reward_4311.623.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:986afd2328fe819d65d8aff66a9fd77c1d5ea1186758b05edb5cf0c761f05c16
|
| 3 |
+
size 83071
|
checkpoint_p0/checkpoint_000019256_9859072.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:939d75e1b639ef1ac4777d43d2f9a2429ca7789a03ce30faa06b046aad0bce75
|
| 3 |
+
size 83335
|
checkpoint_p0/checkpoint_000019544_10006528.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cd50816ac4986e76951727309652d951f620c5555253c82a889f5ac45fdc4b0
|
| 3 |
+
size 83335
|
config.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"help": false,
|
| 3 |
"algo": "APPO",
|
| 4 |
-
"env": "
|
| 5 |
-
"experiment": "
|
| 6 |
"train_dir": "./train_dir",
|
| 7 |
-
"restart_behavior": "
|
| 8 |
"device": "gpu",
|
| 9 |
"seed": null,
|
| 10 |
-
"num_policies":
|
| 11 |
"async_rl": false,
|
| 12 |
"serial_mode": false,
|
| 13 |
"batched_sampling": false,
|
|
@@ -23,7 +23,7 @@
|
|
| 23 |
"rollout": 64,
|
| 24 |
"recurrence": 1,
|
| 25 |
"shuffle_minibatches": false,
|
| 26 |
-
"gamma": 0.
|
| 27 |
"reward_scale": 1,
|
| 28 |
"reward_clip": 1000.0,
|
| 29 |
"value_bootstrap": true,
|
|
@@ -105,7 +105,7 @@
|
|
| 105 |
"with_wandb": true,
|
| 106 |
"wandb_user": "matt-stammers",
|
| 107 |
"wandb_project": "sample_factory",
|
| 108 |
-
"wandb_group": "
|
| 109 |
"wandb_job_type": "SF",
|
| 110 |
"wandb_tags": [
|
| 111 |
"mujoco"
|
|
@@ -122,14 +122,14 @@
|
|
| 122 |
"pbt_target_objective": "true_objective",
|
| 123 |
"pbt_perturb_min": 1.1,
|
| 124 |
"pbt_perturb_max": 1.5,
|
| 125 |
-
"command_line": "--algo=APPO --env=
|
| 126 |
"cli_args": {
|
| 127 |
"algo": "APPO",
|
| 128 |
-
"env": "
|
| 129 |
-
"experiment": "
|
| 130 |
"train_dir": "./train_dir"
|
| 131 |
},
|
| 132 |
"git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
|
| 133 |
"git_repo_name": "not a git repository",
|
| 134 |
-
"wandb_unique_id": "
|
| 135 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"help": false,
|
| 3 |
"algo": "APPO",
|
| 4 |
+
"env": "mujoco_walker",
|
| 5 |
+
"experiment": "Walker",
|
| 6 |
"train_dir": "./train_dir",
|
| 7 |
+
"restart_behavior": "restart",
|
| 8 |
"device": "gpu",
|
| 9 |
"seed": null,
|
| 10 |
+
"num_policies": 1,
|
| 11 |
"async_rl": false,
|
| 12 |
"serial_mode": false,
|
| 13 |
"batched_sampling": false,
|
|
|
|
| 23 |
"rollout": 64,
|
| 24 |
"recurrence": 1,
|
| 25 |
"shuffle_minibatches": false,
|
| 26 |
+
"gamma": 0.99,
|
| 27 |
"reward_scale": 1,
|
| 28 |
"reward_clip": 1000.0,
|
| 29 |
"value_bootstrap": true,
|
|
|
|
| 105 |
"with_wandb": true,
|
| 106 |
"wandb_user": "matt-stammers",
|
| 107 |
"wandb_project": "sample_factory",
|
| 108 |
+
"wandb_group": "mujoco_walker",
|
| 109 |
"wandb_job_type": "SF",
|
| 110 |
"wandb_tags": [
|
| 111 |
"mujoco"
|
|
|
|
| 122 |
"pbt_target_objective": "true_objective",
|
| 123 |
"pbt_perturb_min": 1.1,
|
| 124 |
"pbt_perturb_max": 1.5,
|
| 125 |
+
"command_line": "--algo=APPO --env=mujoco_walker --experiment=Walker --train_dir=./train_dir",
|
| 126 |
"cli_args": {
|
| 127 |
"algo": "APPO",
|
| 128 |
+
"env": "mujoco_walker",
|
| 129 |
+
"experiment": "Walker",
|
| 130 |
"train_dir": "./train_dir"
|
| 131 |
},
|
| 132 |
"git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
|
| 133 |
"git_repo_name": "not a git repository",
|
| 134 |
+
"wandb_unique_id": "Walker_20230921_122411_074084"
|
| 135 |
}
|
git.diff
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
replay.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:940b40bece17f417d27d892c4412743dd9ef5ede2e7d834a89d67769b91b3db5
|
| 3 |
+
size 18126693
|
sf_log.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|