diff --git a/.gitattributes b/.gitattributes index f38140ea016c103d1623219bc5c77d75efbfd848..4bf4a1dd01ef1c350aed40bb14bf33c275287a27 100644 --- a/.gitattributes +++ b/.gitattributes @@ -74,3 +74,20 @@ llama32-1b-hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__ll 4000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_gsm8k_2025-12-29T05-42-32.998053.jsonl filter=lfs diff=lfs merge=lfs -text 4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2025-12-29T05-33-51.606131.jsonl filter=lfs diff=lfs merge=lfs -text 4000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__4000_hf/samples_hellaswag_2026-01-07T03-56-26.179191.jsonl filter=lfs diff=lfs merge=lfs -text +2500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl filter=lfs diff=lfs merge=lfs -text +2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl filter=lfs diff=lfs merge=lfs -text +2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl filter=lfs diff=lfs merge=lfs -text +2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl filter=lfs diff=lfs merge=lfs -text +10000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl filter=lfs diff=lfs merge=lfs -text +10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl filter=lfs diff=lfs merge=lfs -text +12500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl filter=lfs diff=lfs merge=lfs -text +12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl filter=lfs diff=lfs merge=lfs -text +5000_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl filter=lfs diff=lfs merge=lfs -text +5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl filter=lfs diff=lfs merge=lfs -text +7500_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text +7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl filter=lfs diff=lfs merge=lfs -text +7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl b/10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..99941d4e1a5379b7f2873ae5649b8484653b23b3 --- /dev/null +++ b/10000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_hellaswag_2026-01-07T03-56-14.560763.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:019283f25ac52ae31c6cf25385f1c39868a83c840ee685eeaace39a373d64ec1 +size 42645134 diff --git a/10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl b/10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2e3a3ef3f0e1c1844a20d6c02915abe0e6c3028a --- /dev/null +++ b/10000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__10000_hf/samples_gsm8k_2026-01-07T07-13-00.293068.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:979f960ef99474ceb5be3a2b15c727db71dd0ddd0d62c82e73abff3a58077fc7 +size 16725724 diff --git a/10000_hf/model.safetensors b/10000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25bb9f36d6b781f4e45528fead09205cc13dc33f --- /dev/null +++ b/10000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4f056bd3db4b0663bcc79008263ed8fc6ab3b2061f578ae12e6361cd47e3b0 +size 2471645608 diff --git a/10000_hf/tokenizer.json b/10000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/10000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt b/12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..5647209edf9921f853dd7c96654442ba9a569024 --- /dev/null +++ b/12500/lr_scheduler/lr_scheduler_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374278ee7760e355e5f7d19924fad3e4fca7d2c8c0e8ce04f9497d4c9091bc23 +size 3781 diff --git a/12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d04bd0e387ea995c0e166ab3c6f87a3ec933a546 --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f64f9580bc254ac7da5e2a4f85508ee532ba1941a624e1765bc40b32c3b936 +size 8388848 diff --git a/12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afb9dcc85681ed986f313a79d3249b643ccaa718 --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755297f607e1208065ec25c006290724f27b2a048deb4f2b0447ebb6a97b64a3 +size 12583280 diff --git a/12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03844937616f5ce129c2593fa4a04f47e9ea70ca --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9320d79d9ad3cb9751098131b608a50be6ccd2e6c22d3311f38c3ec6c4adde +size 4192 diff --git a/12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cebda5ec40be785e32d8273a4e210818de526116 --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768b30c5809543dcb7e2b3a80acebcccc2386d5951858ce96f659472c87edbbd +size 33554672 diff --git a/12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e81658f19f88c6f21292bf3e3397f09457ba6e44 --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d475b067243674f9391064be95d0b62095dd499de017a4cf54e4ced3a80a4967 +size 67109176 diff --git a/12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87b3f07935b1e09ae7930ace5ffc89eecc7aa2a2 --- /dev/null +++ b/12500/model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286b94d6f2348d14553975a37a13790f35b17394a1319ab681d8e2a53d3b4e32 +size 4192 diff --git a/12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9fe22a903f72a01791032aad4febe450da64409 --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a182a893a8659c7ac62516949ac6ec65ca8d8b18b981b45c7591308afe7121 +size 8388848 diff --git a/12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a2fa31a9f1b981bd0939e40272f139210827b45 --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69fc59684b34ca689e9369bce889aad4d0a60bc151ff8a8cabf010986280a13a +size 12583280 diff --git a/12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37495c04e0b6ff0ad1706966a503e737cd158327 --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01eb90d8439f20c3259d5b2f6b87ac7d30931a2a8a0ac4ea2f066d0544c19e40 +size 4192 diff --git a/12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..124437e4d4d42b8f5465932a1b1937732a71dcd1 --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be64a1076bdfd9651c17ca8a05384ce161af29542848c980bf9ef32fa422058d +size 33554672 diff --git a/12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac3f460835cfb14fb429834b1d7374e49318bd5f --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27ba8d73e7115d6ae7d8a05bd6201dc5c975d068d207f14cd95f52d619758a2 +size 67109176 diff --git a/12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98aa037d5f57c1a330853526faca493f715394aa --- /dev/null +++ b/12500/model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380913079b683588e3514ec46b60d2bd75699a26230effec7a24ac405831b63d +size 4192 diff --git a/12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e8d3917bf202ff81b8e928a79383b58712682c2 --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932cd453fe7d9cb054ecf9dfe50dc39c4e7fac374e847152601dff3896b382c3 +size 8388848 diff --git a/12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5536c291877c9a01a6f2180448d632c738297f34 --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bc299ecea6517951175b8fa6fdcc1811ff224448f8658645e51133b175ee10 +size 12583280 diff --git a/12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de4f4885af57c9c16764d099813016df5f890f33 --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f40df8e3ee1734d75dc14a0708ce9f4f47a01635c96f49fac9e8b6c6142e9a1f +size 4192 diff --git a/12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..691d2e832158e71db533d2f0ca44879d137e38c6 --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2547623d0a135fc578cf37237581e8c40a9035a80ac3cf47c3567c17fa35194 +size 33554672 diff --git a/12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2161ec0b7ac8533fd5e29e1fcb8e0c5ef1bd6916 --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fadd4d4606f22244fc7141671bc4e41ee2a39fed5bb59dba76988a95d85f355 +size 67109176 diff --git a/12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7e1fde475322cff5ca4d166d64f9cce151c9e7d --- /dev/null +++ b/12500/model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a8ea473c8821bfbb047c94ee32b7fc508774b0f035130b1a53929866c29526 +size 4192 diff --git a/12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff679908fd31b3e031aa7f688a6294cfa35d70e6 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f82ecb4e76af4b6984f542f1c2d06a922544b1d2c649a5c456c8eb3010a44ee +size 8388848 diff --git a/12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37609162c8bea050a47de267d5cb941c4c4d1e44 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca1919e6460967b14a09695b74b10b4aad5574ab54a8702ac67de8f4f6b6529e +size 12583280 diff --git a/12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c7e386b62973bfaa7d6c08cee2d027762a9e4c7 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13254580fbbaeeccd0abe755649ee68566304272e594be3d5d8dfc7385d58eeb +size 4192 diff --git a/12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..261f2909bdfe5702dc51ea0d9d03375985abfa34 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5deea6376637f31c271ce8f09ed5064aa781d3fd3caad7d7f8f27d0f21e54320 +size 33554672 diff --git a/12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..119b532250b8123a151dac5075edd22de6ebda68 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80e832585909c699d214ac6c9ddbce83dae96cbc10f81a2cb0055aac7d62828 +size 67109176 diff --git a/12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4943692b91081af4f36aee35f4529edf607d2167 --- /dev/null +++ b/12500/model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3101d7f62ad0477c4d4bdb8220bc62c95e80b309367fb39b1a189a392cebc4a4 +size 4192 diff --git a/12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7e7e39707349ee7d9d542c89c91954cb53a44ab --- /dev/null +++ b/12500/model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1e6f9470546f244298688febfacdcc6fba40ab32e6f5cc58d9f7f85cbbc999 +size 4192 diff --git a/12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2211def76f6ecf0835a053a3a53db113e270646e --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9798d69b2bad5743aab3eb5f6697449ed6c05ee622d4ba5dd9e306ac2ad947 +size 8388848 diff --git a/12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5873e45a741bf06d4c272981f373b03b70a0985 --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24104016d3c6d8552f9359d27350ed190bdb33b5bf666e362f50a9fe8f100bfb +size 12583280 diff --git a/12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d361e12ab6bb1d749a2d8ce103a7f18c13019d6 --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4542ddadef586703537eddfac6766f609e45cf2195a97647dc29bc3acff1a2 +size 4192 diff --git a/12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea0d3b336965c8167d663ee7ab4acf237f5b1ea5 --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f40041638bf95fd0c6acc4fc6c617c35952560fd3b3d6f8bb91c005eab9c031 +size 33554672 diff --git a/12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71df9b01b85721d7ab48444f697dc68af8ba0ab9 --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957438630a769c12814f4973d984c8e9fe80b04fcf2d7dc7561bba204f19da40 +size 67109176 diff --git a/12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e439004dae19f7653bf5f2a871fdef3c7a98663 --- /dev/null +++ b/12500/model/model/decoder/4/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a57aa27c2d8e5b206d4d2e995013ddab23daaaf736f762bbc4c9450d20bd19a +size 4192 diff --git a/12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e1d342ebc56498c6e1291f27b95affb82ff8448 --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888b959cf5a5b1f8bb3db748bdcb86a3e72d279de6ab84abf19e8f20252d3de5 +size 8388848 diff --git a/12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d72bae8580b1a806d7af870c88b87425a05ba0bd --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232b95e7c6429c17708b9fae7f8ce3e2d91ee9ce811710459062d2866f921dc4 +size 12583280 diff --git a/12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors b/12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abf8689a6077332d34e9a4e1f6fc3377ebd6ec59 --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/input_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9095db1de1d00abdf92a45573dbd0f2501a12f075124f4b8c934e3ce4dc4b5e1 +size 4192 diff --git a/12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..336b80baa78b0a6549c5153878d946fb0d717506 --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635d465cae92399da192b18274bbeb7b88b214e22cc4b87325904e7c68010ff1 +size 33554672 diff --git a/12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors b/12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92ed7f1f2a76b613703b3e2de87b7b08c2279835 --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52f13cf5aeadc3783aa55e8c9e9396d1bdf79a1074ea9d0535c13ef41c961658 +size 67109176 diff --git a/12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors b/12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d17b28a8c59079ed39fe6cd8cd8cd216a4bf2582 --- /dev/null +++ b/12500/model/model/decoder/5/pp_block/post_attention_layernorm/model_weight.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2c1a4139db805a97468a1324b7be8ffc6cb7a4bb3352a596349d2b812ab53a +size 4192 diff --git a/12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt b/12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbb9445f168c270277a97ea94fc7b2e80f107343 --- /dev/null +++ b/12500/optimizer/optimizer_pp-0-of-1_tp-0-of-1_exp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f2317e040a045732db10e89cf99fa3404d9528f51eb267e7409e484e749b92 +size 14829896970 diff --git a/12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..15bb9782bdd1530ae232fc73387a84decc9195ee --- /dev/null +++ b/12500/random/tp-0-of-1_dp-0-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601ec41ec35ebf4d71aa340e9ad8eec6d97496febf0c82d4b906408c13f4cb3 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9fd73c1278e11ea0cac6f58ca0de36910f9b70d --- /dev/null +++ b/12500/random/tp-0-of-1_dp-1-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631c9eaddef421b77417ad6de8caa9b73ce2bcba3028ae643ece004e415842b3 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0373c058254c042a3252e06381109f6a90a3d1ae --- /dev/null +++ b/12500/random/tp-0-of-1_dp-2-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9154057cc18766552e3716cb2ea909fe597fbf9bf5bbba55f53e4392660fd471 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..7439ec735c9cd78d6ca4965a30820eeaad5b546d --- /dev/null +++ b/12500/random/tp-0-of-1_dp-3-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a75e874575cdf39fe4d6ab9b0eadf99af310b441c93ec648e504f354a32782 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6feb2bf6a2b27e8013b251ce97e802479b35a33 --- /dev/null +++ b/12500/random/tp-0-of-1_dp-4-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3a3b3b43fbfa9f6f526bfd0438f1beb8312f8bb70ed1916db7b4ec6db049a8 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..5353410c2720a6537ce7258058ecae9456b63ce2 --- /dev/null +++ b/12500/random/tp-0-of-1_dp-5-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e00f9afebb28b3eebef1362210f10067a02cd411854be30e8d0a459122294ff +size 1521 diff --git a/12500/random/tp-0-of-1_dp-6-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-6-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..383d9e71229d622ee8c8ac18c15e95d4a4a40e77 --- /dev/null +++ b/12500/random/tp-0-of-1_dp-6-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee8d18e682345ace533c1edc313e2e0abe82d4b60dba80152bb4d53e4f4e753 +size 1521 diff --git a/12500/random/tp-0-of-1_dp-7-of-8_pp-0-of-1.pt b/12500/random/tp-0-of-1_dp-7-of-8_pp-0-of-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..563a558fd1bfb79f3ae69dd9ca9e23cc780317ee --- /dev/null +++ b/12500/random/tp-0-of-1_dp-7-of-8_pp-0-of-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8151d92acfbdd2ef1aff82693896ddcb2ffc4b1b18ef05fb15f2dfc55b883250 +size 1521 diff --git a/12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl b/12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e398511405bca31af04e1beaa07e02f7aaa58ccc --- /dev/null +++ b/12500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_hellaswag_2026-01-07T03-51-35.016035.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e88b434464d264983d46811848835622f0bf77b41c0fe8db74903d5b7c9fc3 +size 42645324 diff --git a/12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl b/12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d9808d3584bfca521d2b7c88751e1edb1ea9d638 --- /dev/null +++ b/12500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__12500_hf/samples_gsm8k_2026-01-07T07-12-17.705915.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94cf7ea2bdf5856f6519081bab85948697f36de9c05b0e4f8f18d61503e7cece +size 16769501 diff --git a/12500_hf/model.safetensors b/12500_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d7088d69af061874275681999af0bc4288c7634 --- /dev/null +++ b/12500_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f622dca917b44f638938c5dbd4c778f0fea70e6f6a1a9bbecd411613728c6e +size 2471645608 diff --git a/12500_hf/tokenizer.json b/12500_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/12500_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl b/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..03df91f7a08be3c959a7398b510617f69e946e0f --- /dev/null +++ b/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2025-12-29T05-34-34.570482.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebd80cdba08d784fe951602282d5f79c84bde9280e821a54d817305a75d8ef8 +size 42643896 diff --git a/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl b/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..03df91f7a08be3c959a7398b510617f69e946e0f --- /dev/null +++ b/2500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_hellaswag_2026-01-07T03-51-34.430609.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebd80cdba08d784fe951602282d5f79c84bde9280e821a54d817305a75d8ef8 +size 42643896 diff --git a/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl b/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..313eae0a5c7b2ce5565605b6c81860d3d151755b --- /dev/null +++ b/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2025-12-29T05-42-37.282529.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b704f2e643956447d5959dd8bd02a236c70949f3082c0e9540a7e15452722117 +size 16776776 diff --git a/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl b/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..313eae0a5c7b2ce5565605b6c81860d3d151755b --- /dev/null +++ b/2500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__2500_hf/samples_gsm8k_2026-01-07T07-15-20.052939.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b704f2e643956447d5959dd8bd02a236c70949f3082c0e9540a7e15452722117 +size 16776776 diff --git a/2500_hf/model.safetensors b/2500_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..197d76e2e593e60fee8d75414945a4bac8de5f17 --- /dev/null +++ b/2500_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb19733f1e9aee21113269f821b2fd759a2767a8968a5a0a108d056fdcc54df +size 2471645608 diff --git a/2500_hf/tokenizer.json b/2500_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/2500_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl b/5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..15729cc1868b607476eb4a79c1652597a92dca80 --- /dev/null +++ b/5000_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_hellaswag_2026-01-07T03-53-49.355990.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6120efbf55a184f73910a7f24467ff6c616fc5dff73d66b92e126bb05fa0b35 +size 42644444 diff --git a/5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl b/5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..30fa6662727e0344ebd86d7192517546223d7aff --- /dev/null +++ b/5000_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__5000_hf/samples_gsm8k_2026-01-07T06-58-05.968321.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3bd4d6afdbb207d8cc365427601fc41c8399d525b33e5d6032c94d4028c5a8 +size 16800458 diff --git a/5000_hf/model.safetensors b/5000_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f742331766fa8c1f6ce774feb128d819068f7cb4 --- /dev/null +++ b/5000_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac60c304da7b3998646c03845818001165a317b1740916953bba9ed68c6f422 +size 2471645608 diff --git a/5000_hf/tokenizer.json b/5000_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/5000_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl b/7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4c7c61a97b4e3fc3320ceb53687771071404dcac --- /dev/null +++ b/7500_hf/harness_eval_0shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_hellaswag_2026-01-07T03-51-34.104451.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229906941384e79d2f01cb582969d9a0675cdb3ee7e738b9367dda4ce8bce18c +size 42644720 diff --git a/7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl b/7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..71a717c9939436907d5a5c8a87825a32034ae626 --- /dev/null +++ b/7500_hf/harness_eval_8shot/__home__aiops__zhuty__nanotron__checkpoints__7500_hf/samples_gsm8k_2026-01-07T07-00-15.435455.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b12ceb6cba2121c62b1b4db784e62fd389d9f21793c0f4d9cd6326d6e33a95d +size 16769968 diff --git a/7500_hf/model.safetensors b/7500_hf/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92ebb07df1bb1cb15cc714a3e56a65bd43a9313b --- /dev/null +++ b/7500_hf/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2499f14923b559337885c6aaf4537b4f18cf92239746de5191b54f8cdfa0a3e0 +size 2471645608 diff --git a/7500_hf/tokenizer.json b/7500_hf/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/7500_hf/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920