IvanHU commited on
Commit
d673238
·
verified ·
1 Parent(s): e69bb6c

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -457,3 +457,20 @@ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-
457
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
458
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
459
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
458
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
459
  model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
460
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
461
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
462
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
463
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
464
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
465
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
466
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
467
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
468
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
469
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
470
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
471
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
472
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
473
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
474
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
475
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
476
+ model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e62c8b48001a4d4b48b33c0996c90451e9ab374f0cc4046b7ab08a89d91c5a
3
+ size 923222
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d36b8cfcfe850a1529f93c81ebd5cc30afc0009846afccda54cb665b88809a7
3
+ size 545101300
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa070c4b42c5e7c45dc1744a26f5764582b2ba1bbf8f1afe050586a945c84eb4
3
+ size 545132060
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec67cc3b023b76dbfe20e5722be89bca17baf7c9aadfa38e13dc676a9ac5f3b
3
+ size 499379568
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ac3443f5c1ccad7b1c8663b58047e361a9adf81cd69d40d36d9f14f37c8727
3
+ size 498618012
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e61f93713f20187761cf59018cd019b1ac1937078dafeeb2b4224e8b0f212c
3
+ size 498643736
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2b1e7edd41e1644d0a6acdb071df627e3a27dba0f79a1fd2082e9a224f9a7c
3
+ size 499459932
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b41cb831c9a6813b482fb61a9701b67513e6105f952df10f0abfb2c99e516d
3
+ size 498478892
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f54661a6f1064252d1f45203f47362df4560e5e5f69a063ef618567d83da847
3
+ size 499459932
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c41278aa9ccfe15106c4fb7e6aa08b61ad5747ebf7c01780b0f95a2794f97f
3
+ size 498583868
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d785292a3837ed5309a70504954bfe2904bfa3afbd6cef246779b762f79a24bd
3
+ size 499403292
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd7b01f1ebe14ee7b440d13278c92eb461a171ccd39362ef2296285b3c676c4
3
+ size 544206316
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c87bfb5edba4aa5d0c94501878503dd2f66aff3b5b581ff42fd25a4586d513de
3
+ size 544049468
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9985a024c6275cc205721c020f49212b8cb31a783aec212bccffb4574e72821
3
+ size 498583868
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30129368a1c5babb916905b409b2023345af9d187fb2fb4b94c10f4ab6395ef2
3
+ size 499403292
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d37d62dead068ebd385da21693da5152a40b266bda1f4b43bc0cae10642fe9f
3
+ size 498422252
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0709bbe178ffe6ff0131edf2d4eb82c34a26f9e68edf19b46388182ad65066b5
3
+ size 498353536
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2880399f0d18ce478fe5ff32f93af1561a459394a0caeb626c5d231c04d9087
3
+ size 18012
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-mixtral-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-mlc-0.0001-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768