IvanHU commited on
Commit
87e4ae4
·
verified ·
1 Parent(s): d733f7e

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +17 -0
  2. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata +3 -0
  3. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp +3 -0
  4. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp +3 -0
  5. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp +3 -0
  6. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp +3 -0
  7. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp +3 -0
  8. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp +3 -0
  9. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp +3 -0
  10. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp +3 -0
  11. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp +3 -0
  12. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp +3 -0
  13. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp +3 -0
  14. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp +3 -0
  15. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp +3 -0
  16. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp +3 -0
  17. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp +3 -0
  18. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp +3 -0
  19. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt +3 -0
  20. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json +1 -0
  21. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt +1 -0
  22. model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt +3 -0
.gitattributes CHANGED
@@ -372,3 +372,20 @@ model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp
372
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
373
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
374
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
373
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
374
  model/dev-kvso-dsv3-0.5b-q16-kv2-ep-16-sep-1-top2-cf-0-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-3-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
375
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata filter=lfs diff=lfs merge=lfs -text
376
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
377
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
378
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
379
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
380
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
381
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
382
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
383
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
384
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
385
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
386
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
387
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
388
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
389
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
390
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
391
+ model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06292bb6c5b6965ec5951236896116b0eb36fcfcc94439eed51223195f80b804
3
+ size 1515519
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15479cf724dda9d604fa1920dfe7087a0181b46e0e9bca690a1ba68006072280
3
+ size 509615457
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0d6783c1c960fd481292a62251e27f8140374d70dd2decefebe76eefdbe741
3
+ size 509638215
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3534ed08bee95b5996b1bfcf73705904c679ad07ddcddb45ebd3a6adc7a86b51
3
+ size 509514158
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b22578a8e6c60eee0212a773ba285d21c6af3127f2876ffb741d874037c78de
3
+ size 509545117
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227a482e53825ee5857ff4bd2e773fc93369c35b43139244ad88a44ed16b5c6c
3
+ size 509488525
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1506a1a33cd04e513131385022f0d24b701aff0a6d43e02cc017ce240ece41c2
3
+ size 509496410
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28794916d68389d6ee25b089d327cc27722de477b8b2f130f7b5cd5a008a39df
3
+ size 509367774
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08ed729d1f58575964714345c16d74c7ddac480b62f42a5efe7b76a25fb72668
3
+ size 509353581
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc495f619c34b9b474d4bd945d1d1b4eda9a40a2baf853ef810dbf2ec55f639
3
+ size 509409739
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e52d9ff378380f38f23fd0e5c9dda028e9966f9ac0c181d5a022f1088bef4fb
3
+ size 509422291
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eccd0018d6528d99f32b9059863b1f1810564668632e3c63d585e30b2308c38d
3
+ size 509274795
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c109cc62a6324a90ea6c948c8e4b4b912009f53c8a16ee3f9fd47fed0a7529c9
3
+ size 509309425
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7e574ed28748082adf189a89c67e5fa65dcb74a3bb0f58a7c72455b9bb9998
3
+ size 509529558
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d296439e36d73e3f2b8effb71f5a73118ea89c94430cd17d96bb8d521dbaea
3
+ size 509499531
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4831bb23b0f3112d4a867e2b45a758f7a52203d23ba20fd00eb0422940154e80
3
+ size 509267230
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad93cd809e9d22589289743fc985a4cece4e2d13984609bd15ca9dd404dce810
3
+ size 509295552
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e981015e7a6a290380b1814cde87ba5638a9de16cd264afe7d8191bdc91384
3
+ size 18663
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/iter_0004768/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 4768
model/dev-mamba-0.5b-q16-kv2-hybrid0.08-bf16-mp2-pp1-lr-2.5e-4-minlr-7e-7-bs-1024-gpus-8-seqlen-8192/linked_runs.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 2025.05.23-02.43.54
2
+ 2025.05.23-02.44.50
3
+ 2025.05.23-13.14.15