Upload folder using huggingface_hub
Browse files- .gitattributes +17 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
- model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +2 -0
.gitattributes
CHANGED
|
@@ -661,3 +661,20 @@ model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep
|
|
| 661 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 662 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 663 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 662 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 663 |
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-2-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 664 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 665 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 666 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 667 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 668 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 669 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 670 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 671 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 672 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 673 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 674 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 675 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 676 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 677 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 678 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
|
| 679 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 680 |
+
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed2c8f85a35aee1c2b38db7c84ac0cf60178aeb79687bcc7dad33b31c18595bf
|
| 3 |
+
size 1145765
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfcc91b2acb8b0bc6c7099833dc692aa95159b5cc640e17bc07de39d814262cc
|
| 3 |
+
size 687946575
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d709fa4ceab5186933a1402cf7646aba29814f1adb2b751dd44cd1dcf28a027c
|
| 3 |
+
size 687944002
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80185fb5af7f336a6d0916b449199cfb935064d12d674fcd57d9fe1c93187553
|
| 3 |
+
size 687423642
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dd412e4eb0a954d7513fac77ed7dd1d82a39478df7a6c4dc72d77e24a6f6311
|
| 3 |
+
size 687437254
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ceee56697290d7d1f76e2e2b962b1ed9503d1fe4b41d560638aa94585da0313c
|
| 3 |
+
size 687641060
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be4dffb47db226a1160a6a772c2341e2f35b1744dbd293f1d6778c031d25801c
|
| 3 |
+
size 687662457
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6a39d921e7bd8be407b79b7462f83ebce94d3f45d2e98afc4c269a99003ac74
|
| 3 |
+
size 687238284
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03fcc58473a9bc160cadc0c40c91c6e2a72b30a1474c200ccec6a21f6b669912
|
| 3 |
+
size 687579804
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cdc7d1b9ed0fd60b8d37dd0e9c269a195f92c7619ffc810331651e4b08faa9d
|
| 3 |
+
size 721091665
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a88c3fa637acf2c7882317c96bd719074469817eb2fbb1fbeafbf04535eb7020
|
| 3 |
+
size 720935490
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39f398a359179af4d58d59fc012a5e4fc1d4281c80f00ba4d43d3979062363b4
|
| 3 |
+
size 719517224
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e79d98c34f17a1310e1d51a078996bee4befebb1e23583f4d54389cde27f76fe
|
| 3 |
+
size 720935490
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06e52db7cef4fdc106c76b7119c512b214df3b5af3f8ca881f815761294697aa
|
| 3 |
+
size 687238436
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e4bba11b1effcbf9621a661890c7c9813595d7faaa65a70accefdcb6ed568b5
|
| 3 |
+
size 687842980
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42b2f9afffe84ab9ea4f832abf73683dab8b8aadc9ced012a93c7d7823721d4f
|
| 3 |
+
size 686847861
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:515608e37edf5dd0fb01ebb66e1c1860089121f0083c03eca44e71302e2f66bf
|
| 3 |
+
size 686792827
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6713429b63f58ae64e669f2be2759ef9bc7da22e0a3b14b989e7e38c2966de04
|
| 3 |
+
size 19175
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4768
|
model/dev-muon-gate-ts-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025.05.20-00.47.39
|
| 2 |
+
2025.05.20-00.52.40
|