IvanHU commited on
Commit
af68eea
·
verified ·
1 Parent(s): 36cc7cc

Upload folder using huggingface_hub

Browse files
Files changed (37) hide show
  1. .gitattributes +33 -0
  2. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp +3 -0
  6. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp +3 -0
  7. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp +3 -0
  8. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp +3 -0
  9. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp +3 -0
  10. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp +3 -0
  11. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp +3 -0
  12. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp +3 -0
  13. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp +3 -0
  14. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp +3 -0
  15. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp +3 -0
  16. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp +3 -0
  17. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  18. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  19. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  20. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  21. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  22. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  23. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  24. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  25. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  26. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  27. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  28. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  29. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  30. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  31. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp +3 -0
  32. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp +3 -0
  33. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp +3 -0
  34. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp +3 -0
  35. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/common.pt +3 -0
  36. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/metadata.json +1 -0
  37. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -1120,3 +1120,36 @@ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-
1120
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1121
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1122
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1120
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1121
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1122
  model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
1123
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
1124
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1125
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
1126
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp filter=lfs diff=lfs merge=lfs -text
1127
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp filter=lfs diff=lfs merge=lfs -text
1128
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp filter=lfs diff=lfs merge=lfs -text
1129
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp filter=lfs diff=lfs merge=lfs -text
1130
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp filter=lfs diff=lfs merge=lfs -text
1131
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp filter=lfs diff=lfs merge=lfs -text
1132
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp filter=lfs diff=lfs merge=lfs -text
1133
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp filter=lfs diff=lfs merge=lfs -text
1134
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp filter=lfs diff=lfs merge=lfs -text
1135
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp filter=lfs diff=lfs merge=lfs -text
1136
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp filter=lfs diff=lfs merge=lfs -text
1137
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp filter=lfs diff=lfs merge=lfs -text
1138
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1139
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
1140
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1141
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
1142
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1143
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
1144
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1145
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
1146
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1147
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
1148
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1149
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1150
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1151
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
1152
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp filter=lfs diff=lfs merge=lfs -text
1153
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp filter=lfs diff=lfs merge=lfs -text
1154
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp filter=lfs diff=lfs merge=lfs -text
1155
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa155c16b9e3eed53c556b2f661ad148f63b10c693bf6c85f78564cbf294756
3
+ size 983254
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6949cd02f72396d944075a9aa64b8a24f1d02392a3ef5e02beffeaf1fb26f6
3
+ size 297959704
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82aa75797f979e4a27f048937bc248b3546ae4ae384898e450f2a7d7fc19c877
3
+ size 297995696
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc5e006c1cff14ec0e734967bd9866fb821b481d9f94d6b72660f7b6debafc1
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__10_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314fa81fa6ea53955f1e7b6e1ff11066c19ba3a48f72821e5e68f8233084e39f
3
+ size 248783388
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecff52fb9cd2a5609e99b82b01e06b849f504b7a2fd0c5429733d58a7b771cf
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__11_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12bd99269ef045e78e4fe4a7893f7476761962c4f2b7619313b706a07d41164
3
+ size 248657200
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b28a8cfd9464ac5c95b7be95223a27530787e49874b10b8fb3086eb288b7504
3
+ size 249212896
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__12_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d41cf6a6d88ef8ec185c2f00c51846aee7aa9bd5d93d0d0f9ebff78cd7671d1
3
+ size 249441272
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2a8b18e9310768fc2dae359867f9a88c79ffdbd27e6a05346f3885275fb8c5
3
+ size 249212896
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__13_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341545108e3a4d5f63becd8195782333324745e1f422202684faeec7f101adb3
3
+ size 249441272
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9357b11e6b876a0aeaa331ee5456c906865791ee4ff86269616055104a01966e
3
+ size 249212896
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__14_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a000c13b41e214075fd0529ad977512435d70678a9dd0c50ea9c980998835781
3
+ size 249441272
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eeb7b17cc44877af9684985a75faaecf46337045aaa7da6ffdedd9ea2818431
3
+ size 249212896
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__15_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74e6f867e6fa441e613f6fde84daea3b53f3fe113e1d7686d3565509746d5dd
3
+ size 249441272
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a3d723a7cf6efaee0b15d16ac55fccc47708d9c675726b6c7c400446abce92
3
+ size 297523104
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6118ad7c269c088320db2993efd1535651b62fb22bda5b50e6fca36427a5d779
3
+ size 296736020
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b768f6a7ab6d2f55d9c71a876fddbdc088c4875f37c952c82f9db70c0460a0
3
+ size 249555484
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f17965c9e07252906d46cbe2ab778a1a2e1400e62d668fe2041dca8dc6e745
3
+ size 249566148
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6a41a97dbd820f098e449cabf790db97b04e47a6c7be536332f2a48fce1bf5
3
+ size 249081324
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee0aa8bf99bc18e61ab900f8e00125b3f8b3d98e562a4fa81b227665ce80427
3
+ size 248881288
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eced91c17b664a5ef3031b97a66593cee30e1511f48e57b6c018463d769cddfa
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9beef4dabd4d335dc511269152668326ddb797fb050ed2c01a54fb7d5d5a096e
3
+ size 248781640
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a4819da5a65a4651b0d1b8847b49c4854fdc737de043ea61c9f8530df5bda7
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7aae8e538bffa73df7b1975c4f9878e21e5182c466d1fdd203afce4486d860
3
+ size 248657200
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a9637de06992a41225a8ca21b101dd5c17c95b6312ade63abf0ff5e4c2b184
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec65d18884bc9e0e58d5e7ded8fe1ef7903a2f1cfae92eb1d09e2b6fbab36b3
3
+ size 248783388
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59991b36d065198401e81ba0c53bf230ad01823d19d1cd82e3665fcfafe58890
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d15beb21e438726b34786153c11b6c4636c1dfe142eda0b318d325e89adaae1
3
+ size 248657200
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0894c4084cc89fe8adcf53fd423222b661980cf5bc50d6b241eda45e59fa45
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__8_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb321429003aaf235e6dd72b76f9569e65e77f7b9d9a2445cd733b23b3a4eb47
3
+ size 248783388
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f7c4b27e670b97983609970a882c141bd33db5d9482d9a6def18dce0db4cbc
3
+ size 249216436
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/__9_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2de9b0e9baf81ddab49bc10e6cc2ce03d3809451a39d0bda6b186206f522bf30
3
+ size 248657200
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbdd3e229dfcd2124aa6ebc588df1ebb260e7407abef721f787d8f40a3c4940c
3
+ size 18140
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-16-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768