IvanHU commited on
Commit
36cc7cc
·
verified ·
1 Parent(s): 2dd1d8e

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -1103,3 +1103,20 @@ model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp
1103
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1104
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1105
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1103
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1104
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1105
  model/dev-tssb-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
1106
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
1107
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1108
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
1109
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1110
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
1111
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1112
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
1113
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1114
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
1115
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1116
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
1117
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1118
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
1119
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1120
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
1121
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1122
+ model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6941ec2b99bfe924ffe77dc439306d22ca46b55c482cf66e6b6ece53275fef
3
+ size 926499
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738a13cae0bf7d9f7d560148780bd0571ba5fafd36f63feb2caf1d78d769cf40
3
+ size 545101300
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53642761d80197c17547ef97f597b9ec307371c3c95ce0027c30a14d90f35725
3
+ size 545132060
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d1aa8111cf14a7237c053ea9fd68b038f9fa04ada2d95999a0a7f89633069f
3
+ size 499379568
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0711a5bf3452cb8e520236c3b09dd1b81a95e4f31289add5b1ba4e3d3fa7f538
3
+ size 498618012
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36419b7a11ce5faf278e9deafb2c08ecf7821533e37b72488404cdcc5bfcc4b6
3
+ size 498643736
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d2cce2f102d3e4cd96dfef9af395238bb40b35c04ee016f500cb2467be53c3
3
+ size 499459932
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a7b474e53bdba2a2ffe4bfcc5d589a2243d38583bd722bf9dfc76960b235f6
3
+ size 498478892
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fecc7191fc0c09d419d8782e6252e380925772a8d702512b074be23a853f8e46
3
+ size 499459932
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c292bd1ce95c3fd794d50953c5eb40bd5e4f48c5fd4e6ea0f21082c19bac6f
3
+ size 498591332
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d710e4ae8bbd84bf1872ec8ad901c2a42e33fe93134ed91c815171bf3df9d503
3
+ size 499403292
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583446f66cb98c2c34f7b30688e3d4fb426482bc696019be3eedd022cbb37b39
3
+ size 544206316
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbdd77596c0d1580601993a4bec4d87a689ab7513b98b0624f08067c27096e68
3
+ size 544049468
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e283b89b0bed9eb0b16151a8cf1fca8a9d55f62d74e8004895f37c7a49b171
3
+ size 498591332
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3614d295b4315173d60b27ed1d6cd069df4906ac245578db4bbb1faaba947860
3
+ size 499403292
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95deb6b064fd5e5ff6f0a0bfb9f1da6d175e72d1d684b2c6269b90046a83b0de
3
+ size 498422252
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eccd266da5874e206a32ce7811f848d1d3b29662e9f9c1dff0eee02d3ce27b8
3
+ size 498353536
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0862063be1d4c0106ecfa0f72df8f31bdd7ca7e9223de1e4e6fd6656c20c9ffd
3
+ size 18140
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dsv3-0.5b-q16-kv2-ep-16-sep--top2-cf-0.0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus--seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768