IvanHU commited on
Commit
908b022
·
verified ·
1 Parent(s): dd4cc44

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +1 -0
.gitattributes CHANGED
@@ -67,3 +67,20 @@ model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-
67
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
68
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
69
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
68
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
69
  model/dev-4k-dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-4096/iter_0009536/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
70
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
71
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
72
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
73
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
74
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
75
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
76
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
77
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
78
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
79
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
80
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
81
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
82
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
83
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
84
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
85
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
86
+ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5888f00203ab3244904e22df31232c5d4b1c1b4ed7f3895d3cff8d4b30a56303
3
+ size 926713
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf118f25020ccc1ac6e25be8960e326a4e3ae93f1dd08189735c0f629c84e803
3
+ size 545224623
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84e893316904d64ac05ec5b5f1582d3941077ae33cb2abf9c66f656cf26435ba
3
+ size 545270878
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f1f9a02bb60f0014fb8dc4e38c9c9a945afad26fe8568eac0cf5e37ac07f3a
3
+ size 499480569
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e133c56de62541712bc5edeb1862dbd1b4a1a26753e89ae5d6195e2f5004e8
3
+ size 498719205
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad0d1318b8b39676a8e49cf5494007e20432d5b4f47c91aecc1c8003f4018fa
3
+ size 498730266
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e8a1f0d5552aca2c625cf6448dc532f7048bc823a1e832d198a116794c98c26
3
+ size 499550585
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99edeaeaf543114193a0b20f9ac5883b06b291d215819c24f4869215799f4c7f
3
+ size 498560261
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc93661559db35ecc3958f49f478fc0679eb4fcbc7365feb6cbf0866526cb4f0
3
+ size 499550585
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91624b170d73f7c8a0ad640a95d997f53a946dd3882862c9aa9d472c7ff9a260
3
+ size 498660791
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ebdc870756cea50ee9206db296dfea7a155d0d5fa0bae5f07faa71ab87f411
3
+ size 499474889
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9348a47f6e3cad5ce33047e941def83c32ae1be503d88df5504c6b74dfa39b
3
+ size 544268949
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c5903764cd3deb117e378e984cb53a69fd1db8e4f595762a64cbb0bee14920
3
+ size 544117569
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76da91bd8e45aea1f7fc1a547933c8dd745aa09409ea18753ae414b3d3f1732c
3
+ size 498660791
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cdeda1043e022b03d8e406741380b2a38003d988249cc5ee74b1070c4f1255
3
+ size 499474889
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b114e77d7244626c356ed5ff20b23a4f96c64ed0d285181b70567890b85ffb
3
+ size 498484565
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04c3158ae5c5e500e19f9abfe69d9023ded5284e5299da8c3a57e7ced7dd8d7
3
+ size 498424736
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59bfb725f61a377c1ad2f73051aa3c4255cd8ca8adc11baf4f57c160b0c80b6b
3
+ size 18855
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025.05.20-00.50.13