IvanHU commited on
Commit
7d376b7
·
verified ·
1 Parent(s): 7a76c9f

Upload folder using huggingface_hub

Browse files
Files changed (38) hide show
  1. .gitattributes +33 -0
  2. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp +3 -0
  6. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp +3 -0
  7. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp +3 -0
  8. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp +3 -0
  9. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp +3 -0
  10. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp +3 -0
  11. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp +3 -0
  12. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp +3 -0
  13. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp +3 -0
  14. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp +3 -0
  15. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp +3 -0
  16. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp +3 -0
  17. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  18. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  19. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  20. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  21. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  22. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  23. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  24. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  25. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  26. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  27. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  28. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  29. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  30. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  31. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp +3 -0
  32. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp +3 -0
  33. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp +3 -0
  34. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp +3 -0
  35. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/common.pt +3 -0
  36. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/metadata.json +1 -0
  37. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  38. model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/linked_runs.txt +6 -0
.gitattributes CHANGED
@@ -84,3 +84,36 @@ model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1
84
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
85
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
86
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
85
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
86
  model/dev-a3e-dsv3-0.5b-q16-kv2-ep-16-sep-0-top3-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
87
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
88
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
89
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
90
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp filter=lfs diff=lfs merge=lfs -text
91
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp filter=lfs diff=lfs merge=lfs -text
92
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp filter=lfs diff=lfs merge=lfs -text
93
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp filter=lfs diff=lfs merge=lfs -text
94
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp filter=lfs diff=lfs merge=lfs -text
95
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp filter=lfs diff=lfs merge=lfs -text
96
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp filter=lfs diff=lfs merge=lfs -text
97
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp filter=lfs diff=lfs merge=lfs -text
98
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp filter=lfs diff=lfs merge=lfs -text
99
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp filter=lfs diff=lfs merge=lfs -text
100
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp filter=lfs diff=lfs merge=lfs -text
101
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp filter=lfs diff=lfs merge=lfs -text
102
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
103
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
104
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
105
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
106
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
107
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
108
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
109
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
110
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
111
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
112
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
113
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
114
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
115
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
116
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp filter=lfs diff=lfs merge=lfs -text
117
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
118
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
119
+ model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e927dd36e3451cbcc7727932e492f1bb5302141841337b79fa54bb9c0f47b3
3
+ size 3062220
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc958bf84a22f674e9f3d3983498960c6dc5ec731d5be3be19a1736acfc27da
3
+ size 1506388620
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9e53be0bbb6926e5a6e81cad8f18268d6398607f57486773bebd3e3bebb4a60
3
+ size 1506388312
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54fe6342943e4f41f9d3b7b3861726c6721f40e8a8c528153aecd5eed32cd3b2
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6240b1de621eb55c866728ddd6c436c173d5218dfd4171c527e8a91375c29d2f
3
+ size 1446938540
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb4b6a157a0a9e94ee2c31e9440627745f80a72e23341431930fd3c9af2e797
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3eb236f3a14dabc7cc776f8f5bb374000ce87eca0cbcd817bd478d98e8c313b
3
+ size 1446610860
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eb09c90591c95f35ca6c4c37f368f5523e335a23079ed304da847ab6ebdbf40
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2734b1cd1dd1b9ffa87760c9ad13295a33bbb105e1224898f2923b2288e880c5
3
+ size 1446938540
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d73bb395c97f036940dfdadbff81794a8ecee366c5a7c75e6401f959278f68
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:217807ea6ae7e7c9f63263f0da6802f734aa703e288ecbc217b12cf0402cc874
3
+ size 1445298960
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0293cb958fa52b9fbe3ef2b66793309697938c6d19726e0f0adeb072c7ac69
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25850d3075b0986f17023207955c49f3903f56891a15f8e9ed47fa599282a19c
3
+ size 1446938540
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd390713892d7575fcaa89f80a87281907402341af5c59a1ac104c55160ac654
3
+ size 1445880040
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:901c55d3c4514cf4f8f77e137fd454935219d7e48ec504cc4fa6b252e49c361c
3
+ size 1445298960
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7637c919e66aeb0e2ef2a968d58432f3efc78b3d351b04f632580463b91fa841
3
+ size 1445875420
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99dbf962bd6b857d789d9b41e4d4f97eacee8824edeb0220f6cbaad903dd107
3
+ size 1446674212
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7985d1c949485bad32bf76937684a98fd24cb3ab6e4cb7af801c716a4f50823c
3
+ size 1446896000
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109c3a7874d5afea0701bc2236860dbddcaafaecbe7ebe871a1accf46c6be217
3
+ size 1446851700
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7bae16d1337f41527e678fa8ac205d0e8a9fcb7cab2892040e7de0c0883e95
3
+ size 1445691480
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab95d21303ab5a357357c2238efaf8099653be0d603a5cf22024eef541c25d14
3
+ size 1445578200
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a565d4bb3f1ceccb06cdd1cc50f8377d42510be609f55fc408e5190c53e6f736
3
+ size 1446331240
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76cd721be59674e48e04088ae298960826ae1a90aa6cf97ec3df2122b623e6b
3
+ size 1446373240
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ddfc6429b75e129ba5efc03cef8994171a99fd22bbce36903617adffa94ed16
3
+ size 1445720300
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e57f61e34e2330085388247ce655081e7398aef3556ddf5f8f4afb264ca0b84
3
+ size 1445630940
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00988ce5c9da9ee38de575ba239a17af7ce5acaf595b5280ac190a46829d61fb
3
+ size 1446304480
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d009f1355e1540d1a50fb47fb99f287f92cabeb5ea30f728b39b1a86dbf99c2
3
+ size 1446258620
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3442e88ee547f22f37720256fa51ac24699d5999afb9ea40ebf02dc0fd46c46
3
+ size 1445974440
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd5175e5a18336f416e00673c95c66e2a537bbaab2c9c877be46cccc778b7e2
3
+ size 1445393360
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb04b7986799a5b93189b7cb1169329be9551e2535de97d4040fe45a9daef642
3
+ size 1446210080
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1c3387c56aae7a77bf21641c0145435771f2080144f0bf148c6c5d94f4f38a
3
+ size 1446174840
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d3d8633be6c0d4ae1e1dfdf884e71a3bd31243ec6639025e554a760fcdad1c
3
+ size 1503359620
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b0ecf624b55310ec8d013ec5db38c8bd41d6339d0cd25caa3ad74623be1152
3
+ size 1505780640
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0785c67fe76384f90c81095e56a398ce759a5a1421c9637ab0cebd135592c1
3
+ size 18140
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-auxfree-3b-q10-kv2-ep-64-sep-2-top6-cf-0-bias-1e-3-bf16-ep8-mp2-pp1-lr-7.8e-4-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ baf4d8f6-cba0-4eb3-b7bb-4ae31357244a
2
+ baf4d8f6-cba0-4eb3-b7bb-4ae31357244a
3
+ dbda828f-e65b-40d8-a277-2f8102c35ce7
4
+ dbda828f-e65b-40d8-a277-2f8102c35ce7
5
+ a627df8f-d7bb-4951-9f2a-166fb8104b64
6
+ a627df8f-d7bb-4951-9f2a-166fb8104b64