continue readout step 500 ppl 92.75
Browse files- README.md +11 -9
- full_model_continue.pt +3 -0
README.md
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
-
# OneShot full model
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
-
|
| 10 |
-
Best readout PPL: 92.75
|
| 11 |
-
Config: d=896, r=320, L=10, vocab=8192
|
|
|
|
| 1 |
+
# OneShot continued full model
|
| 2 |
|
| 3 |
+
Latest continued checkpoint:
|
| 4 |
+
- file: full_model_continue.pt
|
| 5 |
+
- step: 500
|
| 6 |
+
- ppl: 92.7529
|
| 7 |
+
- final: False
|
| 8 |
|
| 9 |
+
Base config:
|
| 10 |
+
- d=896
|
| 11 |
+
- r=320
|
| 12 |
+
- layers=10
|
| 13 |
+
- vocab=8192
|
|
|
|
|
|
full_model_continue.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:443100d60357ade1c83a3949e2ba17c78c4c0aa651af539a28d649e27156d958
|
| 3 |
+
size 643440378
|