Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes
CHANGED
|
@@ -67,3 +67,20 @@ model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-
|
|
| 67 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 68 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 69 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 68 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 69 |
model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5888f00203ab3244904e22df31232c5d4b1c1b4ed7f3895d3cff8d4b30a56303
|
| 3 |
+
size 926713
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf118f25020ccc1ac6e25be8960e326a4e3ae93f1dd08189735c0f629c84e803
|
| 3 |
+
size 545224623
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84e893316904d64ac05ec5b5f1582d3941077ae33cb2abf9c66f656cf26435ba
|
| 3 |
+
size 545270878
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0f1f9a02bb60f0014fb8dc4e38c9c9a945afad26fe8568eac0cf5e37ac07f3a
|
| 3 |
+
size 499480569
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97e133c56de62541712bc5edeb1862dbd1b4a1a26753e89ae5d6195e2f5004e8
|
| 3 |
+
size 498719205
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ad0d1318b8b39676a8e49cf5494007e20432d5b4f47c91aecc1c8003f4018fa
|
| 3 |
+
size 498730266
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e8a1f0d5552aca2c625cf6448dc532f7048bc823a1e832d198a116794c98c26
|
| 3 |
+
size 499550585
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99edeaeaf543114193a0b20f9ac5883b06b291d215819c24f4869215799f4c7f
|
| 3 |
+
size 498560261
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc93661559db35ecc3958f49f478fc0679eb4fcbc7365feb6cbf0866526cb4f0
|
| 3 |
+
size 499550585
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91624b170d73f7c8a0ad640a95d997f53a946dd3882862c9aa9d472c7ff9a260
|
| 3 |
+
size 498660791
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02ebdc870756cea50ee9206db296dfea7a155d0d5fa0bae5f07faa71ab87f411
|
| 3 |
+
size 499474889
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d9348a47f6e3cad5ce33047e941def83c32ae1be503d88df5504c6b74dfa39b
|
| 3 |
+
size 544268949
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94c5903764cd3deb117e378e984cb53a69fd1db8e4f595762a64cbb0bee14920
|
| 3 |
+
size 544117569
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76da91bd8e45aea1f7fc1a547933c8dd745aa09409ea18753ae414b3d3f1732c
|
| 3 |
+
size 498660791
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27cdeda1043e022b03d8e406741380b2a38003d988249cc5ee74b1070c4f1255
|
| 3 |
+
size 499474889
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4b114e77d7244626c356ed5ff20b23a4f96c64ed0d285181b70567890b85ffb
|
| 3 |
+
size 498484565
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b04c3158ae5c5e500e19f9abfe69d9023ded5284e5299da8c3a57e7ced7dd8d7
|
| 3 |
+
size 498424736
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59bfb725f61a377c1ad2f73051aa3c4255cd8ca8adc11baf4f57c160b0c80b6b
|
| 3 |
+
size 18855
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2025.05.20-00.50.13
|