MattStammers commited on
Commit
aae73f8
·
1 Parent(s): 175dee0

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1694498871.rhmmedcatt-ProLiant-ML350-Gen10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d951d05fff737e7165aa0beced33148be68378ad7271d16e3ea364ce5cd49e05
3
+ size 1271462
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: doom_defend_the_line
16
  metrics:
17
  - type: mean_reward
18
- value: 27.60 +/- 6.76
19
  name: mean_reward
20
  verified: false
21
  ---
@@ -53,5 +53,4 @@ python -m <path.to.train.module> --algo=APPO --env=doom_defend_the_line --train_
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
-
57
- This one was trained for about 44 million timesteps
 
15
  type: doom_defend_the_line
16
  metrics:
17
  - type: mean_reward
18
+ value: 36.30 +/- 4.86
19
  name: mean_reward
20
  verified: false
21
  ---
 
53
  ```
54
 
55
  Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
56
+
 
checkpoint_p0/best_000024273_99422208_reward_36.850.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cadb19eeb42fad9468d9a4803335d8b87203f8d95210f3bf27ffd08218da58
3
+ size 34928806
checkpoint_p0/checkpoint_000024301_99536896.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c29c986ae13506728c69d872154d3bacda5de7e6116cab23f44a4ac42efea3
3
+ size 34929220
checkpoint_p0/checkpoint_000024416_100007936.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dae3f85a0f697a18698a5712bf7b5d8f7515b198433e6d3f99b49ce2a8e095b
3
+ size 34929220
git.diff CHANGED
@@ -5,10 +5,10 @@ diff --git a/environments/ai_vs_ai/ml-agents b/environments/ai_vs_ai/ml-agents
5
  -Subproject commit 8bcedabd808ffb7097f88b800fc92dea82dfd610
6
  +Subproject commit 8bcedabd808ffb7097f88b800fc92dea82dfd610-dirty
7
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
8
- index fb7bd62..2f221a2 100644
9
  --- a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
10
  +++ b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
11
- @@ -18493,3 +18493,10394 @@ r,l,t
12
  19475.0,10454,121627.848456
13
  8600.0,4558,121633.704827
14
  14975.0,5637,121641.623816
@@ -10403,11 +10403,35 @@ index fb7bd62..2f221a2 100644
10403
  +26150.0,12102,258610.838674
10404
  +15625.0,6467,258619.378055
10405
  +26275.0,11134,258633.935992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10406
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip
10407
  index ade47c0..3e11ac4 100644
10408
  Binary files a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip and b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip differ
10409
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz
10410
- index c09117b..37b1199 100644
10411
  Binary files a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz and b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz differ
10412
  diff --git a/environments/sample_factory/doom_healthgathering.ipynb b/environments/sample_factory/doom_healthgathering.ipynb
10413
  index a7be1b5..84985a6 100644
@@ -10432,7 +10456,7 @@ index a7be1b5..84985a6 100644
10432
  "vscode": {
10433
  "interpreter": {
10434
  diff --git a/environments/sample_factory/train_dir/default_experiment/README.md b/environments/sample_factory/train_dir/default_experiment/README.md
10435
- index 67b28b9..aa48b75 100644
10436
  --- a/environments/sample_factory/train_dir/default_experiment/README.md
10437
  +++ b/environments/sample_factory/train_dir/default_experiment/README.md
10438
  @@ -11,16 +11,16 @@ model-index:
@@ -10446,7 +10470,7 @@ index 67b28b9..aa48b75 100644
10446
  metrics:
10447
  - type: mean_reward
10448
  - value: 9.02 +/- 3.07
10449
- + value: 13.00 +/- 4.77
10450
  name: mean_reward
10451
  verified: false
10452
  ---
@@ -10461,21 +10485,8 @@ index 67b28b9..aa48b75 100644
10461
  After installing Sample-Factory, download the model with:
10462
  ```
10463
  -python -m sample_factory.huggingface.load_from_hub -r MattStammers/rl_course_vizdoom_health_gathering_supreme
10464
- +python -m sample_factory.huggingface.load_from_hub -r MattStammers/vizdoom_deathmatch
10465
  ```
10466
 
10467
 
10468
- @@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r MattStammers/rl_course_viz
10469
-
10470
- To run the model after download, use the `enjoy` script corresponding to this environment:
10471
- ```
10472
- -python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
10473
- +python -m <path.to.enjoy.module> --algo=APPO --env=doom_defend_the_line --train_dir=./train_dir --experiment=vizdoom_deathmatch
10474
- ```
10475
-
10476
-
10477
- @@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
10478
-
10479
- To continue training with this model, use the `train` script corresponding to this environment:
10480
- ```
10481
- -python -m <path.
 
5
  -Subproject commit 8bcedabd808ffb7097f88b800fc92dea82dfd610
6
  +Subproject commit 8bcedabd808ffb7097f88b800fc92dea82dfd610-dirty
7
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
8
+ index fb7bd62..6833eb5 100644
9
  --- a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
10
  +++ b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/0.monitor.csv
11
+ @@ -18493,3 +18493,10418 @@ r,l,t
12
  19475.0,10454,121627.848456
13
  8600.0,4558,121633.704827
14
  14975.0,5637,121641.623816
 
10403
  +26150.0,12102,258610.838674
10404
  +15625.0,6467,258619.378055
10405
  +26275.0,11134,258633.935992
10406
+ +25825.0,10522,258647.880362
10407
+ +7925.0,4234,258653.704263
10408
+ +8525.0,4003,258661.50221
10409
+ +11700.0,5208,258699.564736
10410
+ +4200.0,2627,258702.930337
10411
+ +22475.0,10162,258715.957236
10412
+ +26300.0,11499,258729.884603
10413
+ +15575.0,5822,258736.855771
10414
+ +18650.0,8102,258746.582519
10415
+ +22750.0,10352,258759.048396
10416
+ +18950.0,8567,258769.403146
10417
+ +14975.0,5233,258775.717932
10418
+ +15400.0,6507,258783.638297
10419
+ +22625.0,11382,258797.533949
10420
+ +22825.0,11932,258811.996937
10421
+ +22400.0,8667,258853.620059
10422
+ +26400.0,11435,258867.438301
10423
+ +23025.0,10587,258880.314926
10424
+ +22700.0,10092,258892.511546
10425
+ +25975.0,12459,258907.739298
10426
+ +11450.0,4318,258913.037247
10427
+ +8450.0,4712,258918.739674
10428
+ +22575.0,10677,258932.165056
10429
+ +22650.0,10417,258945.528833
10430
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip
10431
  index ade47c0..3e11ac4 100644
10432
  Binary files a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip and b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/best_model.zip differ
10433
  diff --git a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz
10434
+ index c09117b..e2541a5 100644
10435
  Binary files a/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz and b/environments/atari/model/qrdqn/QbertNoFrameskip-v4_6/evaluations.npz differ
10436
  diff --git a/environments/sample_factory/doom_healthgathering.ipynb b/environments/sample_factory/doom_healthgathering.ipynb
10437
  index a7be1b5..84985a6 100644
 
10456
  "vscode": {
10457
  "interpreter": {
10458
  diff --git a/environments/sample_factory/train_dir/default_experiment/README.md b/environments/sample_factory/train_dir/default_experiment/README.md
10459
+ index 67b28b9..7036292 100644
10460
  --- a/environments/sample_factory/train_dir/default_experiment/README.md
10461
  +++ b/environments/sample_factory/train_dir/default_experiment/README.md
10462
  @@ -11,16 +11,16 @@ model-index:
 
10470
  metrics:
10471
  - type: mean_reward
10472
  - value: 9.02 +/- 3.07
10473
+ + value: 27.60 +/- 6.76
10474
  name: mean_reward
10475
  verified: false
10476
  ---
 
10485
  After installing Sample-Factory, download the model with:
10486
  ```
10487
  -python -m sample_factory.huggingface.load_from_hub -r MattStammers/rl_course_vizdoom_health_gathering_supreme
10488
+ +python -m sample_factory.huggingface.load_from_hub -r MattStammers/vizdoom_defend_the_line
10489
  ```
10490
 
10491
 
10492
+ @@ -38,7
 
 
 
 
 
 
 
 
 
 
 
 
 
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68010b7f87dcdf1aec4007f96a304802fb68ca960d69ca4774e48d5c2db016db
3
- size 8759394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de77ab1d23fa75989d5b94c21c32c7c8d708a737db4bbf8db7ff00339a8a95a
3
+ size 9216487
sf_log.txt CHANGED
The diff for this file is too large to render. See raw diff