Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes
CHANGED
|
@@ -525,3 +525,20 @@ model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-
|
|
| 525 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 526 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 527 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 526 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 527 |
model/dev-moe010-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 528 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 529 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 530 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 531 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 532 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 533 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 534 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 535 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 536 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 537 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 538 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 539 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 540 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 541 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 542 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 543 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 544 |
+
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81c1f6cbf2d192bea9cbe02ae6980525c8b8835079a69a88098b54c2d17889f8
|
| 3 |
+
size 1016253
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e77783fb03d439af5c3a544cba78e0f1cc37bb30870e5a465c3ee672732d8793
|
| 3 |
+
size 513801260
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18c6dab30b222cf2c821fe699ce220788b4f3ff25cea2f44152b6ed913afa9ee
|
| 3 |
+
size 513832104
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21c892a52ca869d463b8a1ddf28fe4da0e62d885574dad663132a5c8961b08c5
|
| 3 |
+
size 467744288
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa434b2ed2bdd1051ca37a4ea2940fac99e4e341ef978e6a6f3889ca8ebf74c3
|
| 3 |
+
size 468377732
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58122ff803d43572659457aba8624212da7fbcc646b9c0473b67443a8521f06e
|
| 3 |
+
size 467906848
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee46144b555f9079ac2b69d4254846700cf7a30c72faf36dfbbe3e1288189c49
|
| 3 |
+
size 468361784
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c768c8109a3c8cf7a30a804afc8126436eda11392b6a5bce55fa8d1bf40cfbd
|
| 3 |
+
size 467756396
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b86fe2fdd64fd2e6d663423233b79d02bfc3b30272e1fa3d67155f8f6de3c7c
|
| 3 |
+
size 468361784
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2516c253f758abee8742ceb86ab5901fcbe9256ad11f6c95c68777dff4b73e69
|
| 3 |
+
size 467854928
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:463511654f02d99e8b694881ec2b7966c1ceb8df8562eb2f11fe116c1593de45
|
| 3 |
+
size 468309864
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f67c1c2d8e15085b4e1c557150840b729c8c24af5f7b67363bdd9ecf0fe4ddb4
|
| 3 |
+
size 512325276
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b6f92e3c6eee8005432369cd08caab6db9be9e50d39fd00179cb2e80ed10b75
|
| 3 |
+
size 513331692
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c1f12176903211a00afc994820dddbba6d4e01c1446b27d4b2c38bfa38ac28b
|
| 3 |
+
size 467871384
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4b7f39c9ef05c9a9e5a50bbc14331ed27821af1062a78e64c09ed69484fc5cc
|
| 3 |
+
size 468309864
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7628936ebe9b5c96d37061f5b3babf2a858e697520d8ade75c661f7a38e7b90e
|
| 3 |
+
size 467704476
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c732aebb30aaf0f78a6ce10d7e58d66680cf0829729d66068f7681bb47884069
|
| 3 |
+
size 467522252
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4baf4d75045dd1fad59b576b994b7dc0fff95fd34ecf2a2659aa12e1d94395c
|
| 3 |
+
size 18140
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-moe110-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2025.05.03-23.18.32
|