tyzhu commited on
Commit
194dc22
·
verified ·
1 Parent(s): 0fc2dcc

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +17 -0
  2. 10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl +3 -0
  3. 10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl +3 -0
  4. 10000_hf/model.safetensors +3 -0
  5. 10000_hf/tokenizer.json +3 -0
  6. 12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt +3 -0
  7. 12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  8. 12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  9. 12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
  10. 12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  11. 12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  12. 12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  13. 12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  14. 12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  15. 12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
  16. 12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  17. 12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  18. 12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  19. 12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  20. 12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  21. 12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
  22. 12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  23. 12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  24. 12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  25. 12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  26. 12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  27. 12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
  28. 12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  29. 12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  30. 12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  31. 12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  32. 12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  33. 12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  34. 12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors +3 -0
  35. 12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  36. 12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  37. 12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  38. 12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  39. 12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  40. 12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors +3 -0
  41. 12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  42. 12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
  43. 12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
  44. 12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt +3 -0
  45. 12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt +3 -0
  46. 12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt +3 -0
  47. 12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt +3 -0
  48. 12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt +3 -0
  49. 12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt +3 -0
  50. 12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt +3 -0
.gitattributes CHANGED
@@ -74,3 +74,20 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll
74
  4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text
75
  4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text
76
  4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text
75
  4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text
76
  4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text
77
+ 2500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
78
+ 2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl filter=lfs diff=lfs merge=lfs -text
79
+ 2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl filter=lfs diff=lfs merge=lfs -text
80
+ 2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl filter=lfs diff=lfs merge=lfs -text
81
+ 2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl filter=lfs diff=lfs merge=lfs -text
82
+ 10000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
83
+ 10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl filter=lfs diff=lfs merge=lfs -text
84
+ 10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl filter=lfs diff=lfs merge=lfs -text
85
+ 12500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
86
+ 12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl filter=lfs diff=lfs merge=lfs -text
87
+ 12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl filter=lfs diff=lfs merge=lfs -text
88
+ 5000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
89
+ 5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl filter=lfs diff=lfs merge=lfs -text
90
+ 5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl filter=lfs diff=lfs merge=lfs -text
91
+ 7500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
92
+ 7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl filter=lfs diff=lfs merge=lfs -text
93
+ 7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl filter=lfs diff=lfs merge=lfs -text
10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019283f25ac52ae31c6cf25385f1c39868a83c840ee685eeaace39a373d64ec1
3
+ size 42645134
10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:979f960ef99474ceb5be3a2b15c727db71dd0ddd0d62c82e73abff3a58077fc7
3
+ size 16725724
10000_hf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4f056bd3db4b0663bcc79008263ed8fc6ab3b2061f578ae12e6361cd47e3b0
3
+ size 2471645608
10000_hf/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:374278ee7760e355e5f7d19924fad3e4fca7d2c8c0e8ce04f9497d4c9091bc23
3
+ size 3781
12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f64f9580bc254ac7da5e2a4f85508ee532ba1941a624e1765bc40b32c3b936
3
+ size 8388848
12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:755297f607e1208065ec25c006290724f27b2a048deb4f2b0447ebb6a97b64a3
3
+ size 12583280
12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9320d79d9ad3cb9751098131b608a50be6ccd2e6c22d3311f38c3ec6c4adde
3
+ size 4192
12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768b30c5809543dcb7e2b3a80acebcccc2386d5951858ce96f659472c87edbbd
3
+ size 33554672
12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d475b067243674f9391064be95d0b62095dd499de017a4cf54e4ced3a80a4967
3
+ size 67109176
12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286b94d6f2348d14553975a37a13790f35b17394a1319ab681d8e2a53d3b4e32
3
+ size 4192
12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a182a893a8659c7ac62516949ac6ec65ca8d8b18b981b45c7591308afe7121
3
+ size 8388848
12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69fc59684b34ca689e9369bce889aad4d0a60bc151ff8a8cabf010986280a13a
3
+ size 12583280
12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01eb90d8439f20c3259d5b2f6b87ac7d30931a2a8a0ac4ea2f066d0544c19e40
3
+ size 4192
12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be64a1076bdfd9651c17ca8a05384ce161af29542848c980bf9ef32fa422058d
3
+ size 33554672
12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27ba8d73e7115d6ae7d8a05bd6201dc5c975d068d207f14cd95f52d619758a2
3
+ size 67109176
12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380913079b683588e3514ec46b60d2bd75699a26230effec7a24ac405831b63d
3
+ size 4192
12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932cd453fe7d9cb054ecf9dfe50dc39c4e7fac374e847152601dff3896b382c3
3
+ size 8388848
12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5bc299ecea6517951175b8fa6fdcc1811ff224448f8658645e51133b175ee10
3
+ size 12583280
12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f40df8e3ee1734d75dc14a0708ce9f4f47a01635c96f49fac9e8b6c6142e9a1f
3
+ size 4192
12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2547623d0a135fc578cf37237581e8c40a9035a80ac3cf47c3567c17fa35194
3
+ size 33554672
12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fadd4d4606f22244fc7141671bc4e41ee2a39fed5bb59dba76988a95d85f355
3
+ size 67109176
12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a8ea473c8821bfbb047c94ee32b7fc508774b0f035130b1a53929866c29526
3
+ size 4192
12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f82ecb4e76af4b6984f542f1c2d06a922544b1d2c649a5c456c8eb3010a44ee
3
+ size 8388848
12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca1919e6460967b14a09695b74b10b4aad5574ab54a8702ac67de8f4f6b6529e
3
+ size 12583280
12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13254580fbbaeeccd0abe755649ee68566304272e594be3d5d8dfc7385d58eeb
3
+ size 4192
12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5deea6376637f31c271ce8f09ed5064aa781d3fd3caad7d7f8f27d0f21e54320
3
+ size 33554672
12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b80e832585909c699d214ac6c9ddbce83dae96cbc10f81a2cb0055aac7d62828
3
+ size 67109176
12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3101d7f62ad0477c4d4bdb8220bc62c95e80b309367fb39b1a189a392cebc4a4
3
+ size 4192
12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1e6f9470546f244298688febfacdcc6fba40ab32e6f5cc58d9f7f85cbbc999
3
+ size 4192
12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9798d69b2bad5743aab3eb5f6697449ed6c05ee622d4ba5dd9e306ac2ad947
3
+ size 8388848
12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24104016d3c6d8552f9359d27350ed190bdb33b5bf666e362f50a9fe8f100bfb
3
+ size 12583280
12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4542ddadef586703537eddfac6766f609e45cf2195a97647dc29bc3acff1a2
3
+ size 4192
12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f40041638bf95fd0c6acc4fc6c617c35952560fd3b3d6f8bb91c005eab9c031
3
+ size 33554672
12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957438630a769c12814f4973d984c8e9fe80b04fcf2d7dc7561bba204f19da40
3
+ size 67109176
12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a57aa27c2d8e5b206d4d2e995013ddab23daaaf736f762bbc4c9450d20bd19a
3
+ size 4192
12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:888b959cf5a5b1f8bb3db748bdcb86a3e72d279de6ab84abf19e8f20252d3de5
3
+ size 8388848
12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:232b95e7c6429c17708b9fae7f8ce3e2d91ee9ce811710459062d2866f921dc4
3
+ size 12583280
12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9095db1de1d00abdf92a45573dbd0f2501a12f075124f4b8c934e3ce4dc4b5e1
3
+ size 4192
12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635d465cae92399da192b18274bbeb7b88b214e22cc4b87325904e7c68010ff1
3
+ size 33554672
12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f13cf5aeadc3783aa55e8c9e9396d1bdf79a1074ea9d0535c13ef41c961658
3
+ size 67109176
12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2c1a4139db805a97468a1324b7be8ffc6cb7a4bb3352a596349d2b812ab53a
3
+ size 4192
12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f2317e040a045732db10e89cf99fa3404d9528f51eb267e7409e484e749b92
3
+ size 14829896970
12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0601ec41ec35ebf4d71aa340e9ad8eec6d97496febf0c82d4b906408c13f4cb3
3
+ size 1521
12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631c9eaddef421b77417ad6de8caa9b73ce2bcba3028ae643ece004e415842b3
3
+ size 1521
12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9154057cc18766552e3716cb2ea909fe597fbf9bf5bbba55f53e4392660fd471
3
+ size 1521
12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a75e874575cdf39fe4d6ab9b0eadf99af310b441c93ec648e504f354a32782
3
+ size 1521
12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3a3b3b43fbfa9f6f526bfd0438f1beb8312f8bb70ed1916db7b4ec6db049a8
3
+ size 1521
12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e00f9afebb28b3eebef1362210f10067a02cd411854be30e8d0a459122294ff
3
+ size 1521