IvanHU commited on
Commit
742fbb2
·
verified ·
1 Parent(s): bcde2f3

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/.metadata +3 -0
  3. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_0.distcp +3 -0
  4. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_1.distcp +3 -0
  5. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_0.distcp +3 -0
  6. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_1.distcp +3 -0
  7. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_0.distcp +3 -0
  8. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_1.distcp +3 -0
  9. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_0.distcp +3 -0
  10. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_1.distcp +3 -0
  11. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_0.distcp +3 -0
  12. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_1.distcp +3 -0
  13. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_0.distcp +3 -0
  14. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_1.distcp +3 -0
  15. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_0.distcp +3 -0
  16. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_1.distcp +3 -0
  17. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_0.distcp +3 -0
  18. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_1.distcp +3 -0
  19. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/common.pt +3 -0
  20. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/metadata.json +1 -0
  21. model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -1187,3 +1187,20 @@ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-
1187
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1188
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1189
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1188
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1189
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
1190
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/.metadata filter=lfs diff=lfs merge=lfs -text
1191
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1192
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
1193
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1194
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
1195
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1196
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
1197
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1198
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
1199
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1200
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
1201
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1202
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
1203
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1204
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1205
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1206
+ model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58dc7a1264ac31635652b97a433f2abe94e8124f029cd48ffd7976026367824
3
+ size 923216
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e68f73781b0d0c88700bf6ff560834e6e8957b4c5d201c7408f01375fa0cb040
3
+ size 545101300
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f2212c8a18172a830441ba2001fadd34fac69e8300b6137413992b74ed5e96
3
+ size 545132060
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f135fc9ddf03490cf5e7959ecb1ba99ffd6b6c27c5ae323fd3aa6a386e6d0a18
3
+ size 499379568
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e938f7497df3ff7f2a5a39914c9687e0e6fd101f16fb7a245e438f87df5866f
3
+ size 498618012
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50b08e90dbce1f7c605639781d5fc559d268f2a0ca5756ad3a60d8838c3345e8
3
+ size 498643736
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1d19e20da0b70667279e5fb49ec91a3f66ad74cc03df9058090769b5c6b8e8
3
+ size 499459932
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf43a63406f5afad173e47c11992e4721f1f87cc2b57c123b6b27b57ec9f367e
3
+ size 498478892
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e6fc4c00a3dd4ea13da93a4ef7b7815e8bc27e3ee6e7bb1056a209f9ec747d
3
+ size 499459932
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188adacc2d3a77f33bb2914f7c3834e5e114de5432eb343f86029df0951d2e16
3
+ size 498583868
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0491d16374c22658ef913c4155cf1096246ffc6059469e1ad9634c7691659e8d
3
+ size 499403292
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ad4dfe22eb11d908c2ba1034da87fdc6873f4092bc47e5515c7c0d84115d58
3
+ size 544206316
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2123d81daa154cf9549f6c55dc7b1abd6d9d86425f2b299960776f391e0ed1
3
+ size 544049468
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6316d6fe506e08d3dc17a7d337eeb3336ac54de4a827780c57f81106ab565be
3
+ size 498583868
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38e8a75c5ea749761543b7c75c247ac915997b714c2483f50ee371c5dd92066
3
+ size 499403292
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b74731dc94fbd8d2f1b1bb84b7163ee09666b4fc6a4a577e70dd159bead2a19d
3
+ size 498422252
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adc6706a8451ff2e7d312b9f5b56f8926eeddd8038fe31242a4209691ea6b4b9
3
+ size 498353536
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a514d54dabb303e56b21d5e2625efd03bd5e9d8fde46ef5d8606e1d4e69141e3
3
+ size 18140
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0000001/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/mixtral-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-mlc-0.05-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 1