IvanHU commited on
Commit
d00fd64
·
verified ·
1 Parent(s): 9ccbe24

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes CHANGED
@@ -219,3 +219,20 @@ model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-
219
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
220
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
221
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
220
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
221
  model/dev-embdev-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
222
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
223
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
224
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
225
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
226
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
227
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
228
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
229
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
230
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
231
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
232
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
233
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
234
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
235
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
236
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
237
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
238
+ model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde0d4d94af6833dfaecbaaf9e6d6265b5d1da2acd41506a1c37cbc757da3508
3
+ size 929378
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99eba1eee363fd4705ec03b553391cffc5230e7ccff904e8145831be5dde266f
3
+ size 545234085
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35de4aa4662ce12d1d1b07193b13db34d48eaf2dbe8ece835c1197b0d2ea5c29
3
+ size 545280340
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca64460b45ee711d3969c396f5874d94239e214f77a5313a8a2a99bcacfc3ae
3
+ size 499480569
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d94972bb1437ebfd5ca1a31355aed7879afc19d0a13bcba922598970b5f1629
3
+ size 498719205
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fa3780d313393785ba88dfeceb2c2cacceb0ab6948bf9752098ddd26369d343
3
+ size 498730266
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb6e98bad61024feccbe359dba07d84223fedaf6d4165f89765cca7e88d0886
3
+ size 499550585
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d5881e7f7af394c804b0bd409fbe68f01631c5ebe898452148d3a347d565f6b
3
+ size 498560261
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c456fe7427257edf6211758a37929b78eeda467e9cd6d8e4f0f6274ffe1857
3
+ size 499550585
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ed5ea94402eab54789b7f15348691f93d55f6a0cada58e83a5709284a45033
3
+ size 498660791
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6de230aa48a97adb36399266bd21c350f82069cdfb05c50feaa41f07d6feca
3
+ size 499474889
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca937cde4cc508945bb0a62fafa30b34809924e835fdfe922a45cec5fa6bc03f
3
+ size 544268949
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f481f1a9509adc9e8e5b5441ccebedb585b1b725497a090c5c9b8d7a767ae0
3
+ size 544117569
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05cd0b5138290ee38434d348575357bc091f55384930cf0374c7472fa45e411
3
+ size 498660791
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d933e1e3cbf21aa29a42cd891f23dfee41e98cf31e30cd06aa3b0876d7b195b4
3
+ size 499474889
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600a69832d7746751a3dbfe283239fa23f5e34336ef5973f3cd22f5082e5159b
3
+ size 498484565
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df91680d070c71d7e343f6f2cf08a341c9fee9a6748dfb25e6f64a89ca6b7e5f
3
+ size 498424736
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd2b4460c8ce1eee0f95e1747a688f071436a67fcee43ae4470da589276c1f6
3
+ size 18791
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-embdev-loss0.01-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025.05.09-12.34.56