Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/.metadata +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_0.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_1.distcp +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/common.pt +3 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/metadata.json +1 -0
- model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes
CHANGED
|
@@ -1153,3 +1153,20 @@ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-
|
|
| 1153 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1154 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1155 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1153 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1154 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1155 |
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1156 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 1157 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1158 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1159 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1160 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1161 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1162 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1163 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1164 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1165 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1166 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1167 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1168 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1169 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1170 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1171 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 1172 |
+
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecb1477f33bfb4a0dc663fab76a087ce8533d0fd875da17d10c83542cd562e6e
|
| 3 |
+
size 926500
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79c99f26fc67f8c8ec1bcad30684947d4e8cf06f9e121494559b081f74bb96b1
|
| 3 |
+
size 545101300
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69a2a97a97a311d93dc24610bd1a9998f21bb849713ca195838f07ab1ee12334
|
| 3 |
+
size 545132060
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f17ccf7b699da4311b246ea025c3a5ae27598b546f20c67a11d1b2004f1055d
|
| 3 |
+
size 499379568
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c7f6ce36d2472236084b66ec8bf07699626d7f5d9cb47982d2ba55b77adda8d
|
| 3 |
+
size 498618012
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecdc812c8997d414ea9897df8de95cde7b9d7a6b30cbce094b017d7dcdd152b0
|
| 3 |
+
size 498643736
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:918109b0558b6ccb50e36d4922ec94ebd48bdae9763e7d7220dfc79e89fd372b
|
| 3 |
+
size 499459932
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76dd465bfd2b9e52c9666ab60e1866c5a221d0d8c3e559099a8a7b3cc2bf1a1f
|
| 3 |
+
size 498478892
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:302043e58d6e99eaddc46f7bb2e7a4a1fb3ac66cffe0e49ebb993788ff7e1a39
|
| 3 |
+
size 499459932
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b610e1589caf9b9566cc5797375d3778f8ac1331e7745e562d555f195ba40e1
|
| 3 |
+
size 498591332
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f555bc45312e6de7d2552501179be6b515c778554c9a5f66e7778dcfd2c2ac0
|
| 3 |
+
size 499403292
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2ad4dfe22eb11d908c2ba1034da87fdc6873f4092bc47e5515c7c0d84115d58
|
| 3 |
+
size 544206316
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90e60b4f002fddcdff9639d7b8bf12f8986f02d6ab326b3cb77bd6de19a0310c
|
| 3 |
+
size 544049468
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7838f6c40e3bd4e768eb9eeb1c46f0d5a94468f7a7bab2b05d1b09dbea09841
|
| 3 |
+
size 498591332
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3439d3392ee8a9d61df913a91a56ec861621306b92fb9b8cc7be4034c5a81978
|
| 3 |
+
size 499403292
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:899fac6678828660fe440e273ab7ab3b78ce529b7eafeaa252aff86e0fc164c6
|
| 3 |
+
size 498422252
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06f5bc217fe3e8ab9263396a96b5df0288e68d0a94b79212a3887bc955320573
|
| 3 |
+
size 498353536
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:512c7dac53823be1c23301250852448c0c47899aa02348f7ec25251e7c864a7a
|
| 3 |
+
size 18140
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0000001/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1
|