diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..66812e2266e350de4333cd384d19ab91e2cae4b4 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +--- +base_model: [] +library_name: transformers +tags: +- mergekit +- merge + +--- +# Test1 + +This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). + +## Merge Details +### Merge Method + +This model was merged using the NuSLERP merge method. + +### Models Merged + +The following models were included in the merge: +* /workspace/cache/models--TheDrummer--Fallen-Command-A-111B-v1/snapshots/5d2b4bdb35d7dff3a4eb51a5f2b231ba27943491 +* /workspace/cache/models--CohereForAI--c4ai-command-a-03-2025/snapshots/6894b671d755c72573bb1a5722cfcfcd86b42b01 + +### Configuration + +The following YAML configuration was used to produce this model: + +```yaml +dtype: bfloat16 +merge_method: nuslerp +modules: + default: + slices: + - sources: + - layer_range: [0, 64] + model: /workspace/cache/models--CohereForAI--c4ai-command-a-03-2025/snapshots/6894b671d755c72573bb1a5722cfcfcd86b42b01 + parameters: + weight: [0.85, 0.8, 0.9, 0.95, 0.9, 0.8, 0.85] + - layer_range: [0, 64] + model: /workspace/cache/models--TheDrummer--Fallen-Command-A-111B-v1/snapshots/5d2b4bdb35d7dff3a4eb51a5f2b231ba27943491 + parameters: + weight: [0.15, 0.2, 0.1, 0.05, 0.1, 0.2, 0.15] +``` diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e024a824076f211376a60a40f97b8ce5cd1a0fd4 --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "architectures": [ + "Cohere2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "cache_implementation": "hybrid", + "eos_token_id": 255001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 36864, + "layer_norm_eps": 1e-05, + "logit_scale": 0.25, + "max_position_embeddings": 262144, + "model_type": "cohere2", + "num_attention_heads": 96, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "order_of_interleaved_layers": "local_attn_first", + "pad_token_id": 0, + "position_embedding_type": "rope_gptj", + "rope_scaling": null, + "rope_theta": 50000, + "rotary_pct": 1.0, + "sliding_window": 4096, + "sliding_window_pattern": 4, + "torch_dtype": "bfloat16", + "transformers_version": "4.50.3", + "unsloth_fixed": true, + "unsloth_version": "2025.3.18", + "use_cache": true, + "use_embedding_sharing": true, + "use_gated_activation": true, + "use_parallel_block": true, + "use_parallel_embedding": true, + "vocab_size": 256000 +} diff --git a/mergekit_config.yml b/mergekit_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..1f7cb3f70beff086f61d487848cfbc5f6f8fbf39 --- /dev/null +++ b/mergekit_config.yml @@ -0,0 +1,14 @@ +dtype: bfloat16 +merge_method: nuslerp +modules: + default: + slices: + - sources: + - layer_range: [0, 64] + model: /workspace/cache/models--CohereForAI--c4ai-command-a-03-2025/snapshots/6894b671d755c72573bb1a5722cfcfcd86b42b01 + parameters: + weight: [0.85, 0.8, 0.9, 0.95, 0.9, 0.8, 0.85] + - layer_range: [0, 64] + model: /workspace/cache/models--TheDrummer--Fallen-Command-A-111B-v1/snapshots/5d2b4bdb35d7dff3a4eb51a5f2b231ba27943491 + parameters: + weight: [0.15, 0.2, 0.1, 0.05, 0.1, 0.2, 0.15] \ No newline at end of file diff --git a/model-00001-of-00049.safetensors b/model-00001-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dea47f1b0324b64f54c6d6dc3ee9d71cd98fc0a2 --- /dev/null +++ b/model-00001-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b0580b297718a42483245c285bf83479c0f8babffa1e05db0931104243da38 +size 6291456144 diff --git a/model-00002-of-00049.safetensors b/model-00002-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11ca1d859ce4f3d89d5300e8e25bf0c9371911b5 --- /dev/null +++ b/model-00002-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4a943ff834f9318c3041d8329bb288fedb16c1169c8f5795b7fa296c0e9f09 +size 4278240416 diff --git a/model-00003-of-00049.safetensors b/model-00003-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af080c787fc3c7e966d67ab93ed0f1c0d4f0c31a --- /dev/null +++ b/model-00003-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff152143dc8721e37da5a24d0b3a0070cb3aea12c738875a141e5a271720969 +size 4278215736 diff --git a/model-00004-of-00049.safetensors b/model-00004-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dad2acb60132adf7b1b27c99a801ab88263a1627 --- /dev/null +++ b/model-00004-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ced3598456cddd55f89eacd600d97420fa58beba9ebc47e592635100e68d63 +size 4932552336 diff --git a/model-00005-of-00049.safetensors b/model-00005-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5df8464c9d6f5262f0749e1d9fa7544a6c100319 --- /dev/null +++ b/model-00005-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49845a3008cade80780e10dda14f4bb8a79fe9bce3eb70f2da90d664ca6cfd8e +size 4278215744 diff --git a/model-00006-of-00049.safetensors b/model-00006-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da4ce2d04797acd5e6fefa7acd8a9a8328ffba0e --- /dev/null +++ b/model-00006-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e81c8720678d94c90795bf6adeb46e58334e1fff6020642d6ae7be14deef1f7 +size 4278215744 diff --git a/model-00007-of-00049.safetensors b/model-00007-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d813b8553703d8341905217d68ab7bf1281f04d --- /dev/null +++ b/model-00007-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8abc550bf04907b7b5b4abbbd87181f9e275ecc0f12d34636cee04f34685821 +size 4932552336 diff --git a/model-00008-of-00049.safetensors b/model-00008-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..061c7b1c41d5da5b92ae9c9496e2040383a54f05 --- /dev/null +++ b/model-00008-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa0035bf27b69819f0c9a603cf4ad7259c3c4a1252cbd3031d0ea40419cf132 +size 4278215744 diff --git a/model-00009-of-00049.safetensors b/model-00009-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c1420849d0ef74054857d37e78d95303154a62c --- /dev/null +++ b/model-00009-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1ab2c97ec3dfe6721e019642455b74c9474d4f52c2f60d120d6b8b3eaabc8ac +size 4278215744 diff --git a/model-00010-of-00049.safetensors b/model-00010-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfd32858a600bc6c578c78fa749c330743863a56 --- /dev/null +++ b/model-00010-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0234e5aaa7f1051ac254577014a5f5d1194549964498ed8a333e73b7d486b840 +size 4932552328 diff --git a/model-00011-of-00049.safetensors b/model-00011-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92201c276bc82e8b8f66cb87675a13c7a14be4c4 --- /dev/null +++ b/model-00011-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230014326a7b939ad4482aa29ba1fddf02c60f6a3cd014a523471b27daa296d8 +size 4278215736 diff --git a/model-00012-of-00049.safetensors b/model-00012-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..357f4ab37729fd8c0bd6e399e560a9e762eb61d1 --- /dev/null +++ b/model-00012-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e027c3ffd0907f0b271344921962c1a1dc22db5e26dba2e15c2479b6a238e29 +size 4278215744 diff --git a/model-00013-of-00049.safetensors b/model-00013-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbc9a9df7ef17593bd82615dc251ba7d1fd8acb4 --- /dev/null +++ b/model-00013-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb587a4ad954a3c5edc5469953f581de4912fbf98758394b2160dbfe6250228a +size 4932552336 diff --git a/model-00014-of-00049.safetensors b/model-00014-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f097d0c0fe655527f4e960d8240a87a1de84e5b8 --- /dev/null +++ b/model-00014-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e0e2541b7c76ab481c73b7a783b665c7ea9122825a14232dd2e22f92cf8e87 +size 4278215744 diff --git a/model-00015-of-00049.safetensors b/model-00015-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb5c5ddbd37aca37c6486b08e8d43450bdf81aa5 --- /dev/null +++ b/model-00015-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57d62ea42224203b62168ccf1b126b3f2f1d4652626dbb4ad1969b52f97ca02 +size 4278215744 diff --git a/model-00016-of-00049.safetensors b/model-00016-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f0b611ab4aa00fc78b85bfad9d594bf75e15755 --- /dev/null +++ b/model-00016-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488d79f14185e949761a98c7069454d4c6dde09a8e732d1f29ad34f7eee2a5e3 +size 4932552336 diff --git a/model-00017-of-00049.safetensors b/model-00017-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be1a5431983e3615f8d65645fd379845cb88e556 --- /dev/null +++ b/model-00017-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b258a3f8cc45d97fe3ff673468a7c9b53bd60212938fc537997ae8ee3f53a0d8 +size 4278215744 diff --git a/model-00018-of-00049.safetensors b/model-00018-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..639a72bd602324a7aec2950bd416d6fd38723f07 --- /dev/null +++ b/model-00018-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a22952186ce33de46d905d09ee7a8c9873b78e44610c7180075f5c08bbf136 +size 4278215744 diff --git a/model-00019-of-00049.safetensors b/model-00019-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..163be5d56d18a639b762e64d35d4c16e68215a51 --- /dev/null +++ b/model-00019-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e7a2153c1aea192619e65d8b897ba7a667a9073c8261cb99ae0ca893ddf658 +size 4932552328 diff --git a/model-00020-of-00049.safetensors b/model-00020-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d9608f743c4bd4aa4fd8f89b32d6f901f78dc7c --- /dev/null +++ b/model-00020-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7feb5068ed884dcd99fcf4f7dbede994eef865405cebfa054a0bf1f40d3fede1 +size 4278215744 diff --git a/model-00021-of-00049.safetensors b/model-00021-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2aba3fd77b5de82715184f5dd842f22575a52c4a --- /dev/null +++ b/model-00021-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c30888102679f621d4e860601055e14993aea3e78687cb9fa926866df22328 +size 4278215744 diff --git a/model-00022-of-00049.safetensors b/model-00022-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c73f89656af117f9cbc467c4e9d2f1376bccb3c --- /dev/null +++ b/model-00022-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8acd5544e3427db315faf9c28de0a678a343428c75e2209a87333298da03ca43 +size 4932552336 diff --git a/model-00023-of-00049.safetensors b/model-00023-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e09c140178871d5f1bcc22fad70ced041368da1f --- /dev/null +++ b/model-00023-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:612c9a374e15cdf73298b57d9e250d5c994da67aede154e1d2712d2662fb263d +size 4278215744 diff --git a/model-00024-of-00049.safetensors b/model-00024-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c482eefa31a021d4eb0a5c73ca64c3560a603fa --- /dev/null +++ b/model-00024-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802ebd683d8faf619c310869136f608bc838113c71d4f3183d47c77336de519c +size 4278215744 diff --git a/model-00025-of-00049.safetensors b/model-00025-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..158dbcd7aaf4a1ab3351467aae709a78e0e5b19b --- /dev/null +++ b/model-00025-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94aa9211d0161bf503999e2c85e07d69f6d2a84868a9ec2041d0277b3518592 +size 4932552336 diff --git a/model-00026-of-00049.safetensors b/model-00026-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71bb16dd6769f3d97ea97000a0faa6ea54cdf5cf --- /dev/null +++ b/model-00026-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aef782bb909fe1ee030689d7d613655e2af1f2dbd54c3f5f638d153be4ee627 +size 4278215744 diff --git a/model-00027-of-00049.safetensors b/model-00027-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2226c11d0b05e9598301977de51e28d9fd52fe0e --- /dev/null +++ b/model-00027-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f123e8c4a49207bab1260bf64198c2739498e86fba3be11ec5f7aab26833570 +size 4278215744 diff --git a/model-00028-of-00049.safetensors b/model-00028-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef06a840b3a87e31b0cb285b516c8286acb0d3cf --- /dev/null +++ b/model-00028-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b29f20b9426309270dbf5fadb193ba667811e53cdf2dafd69a44de2e3a2802 +size 4932552328 diff --git a/model-00029-of-00049.safetensors b/model-00029-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49d5edeba8e784502344f144fbea2c1392aea6b1 --- /dev/null +++ b/model-00029-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cad09d68490387199facdf027be131a7786b0698d98c202af6e04650137e2ff +size 4278215744 diff --git a/model-00030-of-00049.safetensors b/model-00030-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..417f70b8731746c11157cf066869427054eda4be --- /dev/null +++ b/model-00030-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6546959ce612a06c3610cdef98e1c33d61a6b85b175270d465be72439e1794b5 +size 4278215744 diff --git a/model-00031-of-00049.safetensors b/model-00031-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0122db4e850a312b457e8462bce59308b1d6efb6 --- /dev/null +++ b/model-00031-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89451da7703881dc7517c45657884659f9dfd942deb017ad58b4eea05f86cf61 +size 4932552336 diff --git a/model-00032-of-00049.safetensors b/model-00032-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3a8769bdbc5b233773f2e15c37149105e701382 --- /dev/null +++ b/model-00032-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb788d46a5924136a7f0c7c33171aed0d2fde0f2c14bd9b81fa541bf9118aaa +size 4278215744 diff --git a/model-00033-of-00049.safetensors b/model-00033-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a08b31b0728d67479db8645a900bef0c99d5bbb --- /dev/null +++ b/model-00033-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e123a8f9aec7ee96bae788702294dc47857218bb053d488d14e41a86f103a40c +size 4278215744 diff --git a/model-00034-of-00049.safetensors b/model-00034-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..070540002ba4c0bb0f46a7e36315e6d7071da9d5 --- /dev/null +++ b/model-00034-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e6ddd9508148bf111b469e101564ffe59df878fb025a6d9b801081cc063a65 +size 4932552336 diff --git a/model-00035-of-00049.safetensors b/model-00035-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7895ae457622766ac4bd59b95ef167048725003 --- /dev/null +++ b/model-00035-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbda5842405e076e4d5305e64658d691a6bd1ff48e6dca34e18bfe3aa5da1a12 +size 4278215744 diff --git a/model-00036-of-00049.safetensors b/model-00036-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56851463b92051419e2b2795aab55114ebaa4035 --- /dev/null +++ b/model-00036-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010fc5bbe2b8321b02c60e3b47b186c2268d46cdb1e9ed0d0a2ac663ba12261b +size 4278215736 diff --git a/model-00037-of-00049.safetensors b/model-00037-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a85678e2fe103619a73e7aeb4eaa5824f793f26 --- /dev/null +++ b/model-00037-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9e6c19bef1442eb764c8090efe362ef577a6adaf6bc95c5372967e5b5ec640 +size 4932552336 diff --git a/model-00038-of-00049.safetensors b/model-00038-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d72dbd273e8b3f163bb36058e6751f462a11db5f --- /dev/null +++ b/model-00038-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff529357acb86b8783a41671055a76bf47b8a636e9d6b4f55ebf5362f37769b +size 4278215744 diff --git a/model-00039-of-00049.safetensors b/model-00039-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1672e758c2cdcbca8b7b024e5c64ebf98561b9f2 --- /dev/null +++ b/model-00039-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4df22dccb2695d14a8c1c6ea3085b2d00cff4e74177c812fd852ab959ea6dc4 +size 4278215744 diff --git a/model-00040-of-00049.safetensors b/model-00040-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bee3a729c26d8fd253985dd4096ee0a9d717003 --- /dev/null +++ b/model-00040-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bac6b24b16f69737e08b7f31e73ec9d3d38b303b7a9d8833bc2bf7df8d54b16 +size 4932552336 diff --git a/model-00041-of-00049.safetensors b/model-00041-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..decc4b2be00e33a35dbc39d7dd6b34fb4ab549aa --- /dev/null +++ b/model-00041-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5d175915987cf4badea7cc75a7602412c8f97dbcfda67aac4d78ff62241560 +size 4278215744 diff --git a/model-00042-of-00049.safetensors b/model-00042-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60d8b3fd8c10ff21e4e8f0e62f05cc0a0cc15a70 --- /dev/null +++ b/model-00042-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e54e69338559462378c4f1e614ed29491f9022c4c57a6a467d3ed07c0065b60 +size 4278215744 diff --git a/model-00043-of-00049.safetensors b/model-00043-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da68a8d2d7686e05ceced511bdf525da02d0525e --- /dev/null +++ b/model-00043-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477c1faf63d9681441c92b58fc6e15db2f1e8f0fd16509f79b9499fb77f71cbd +size 4932552328 diff --git a/model-00044-of-00049.safetensors b/model-00044-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1a47e1a8e74d4b9a265e86e67bf4f8e88ee30f8 --- /dev/null +++ b/model-00044-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91ef158bf3c3785d0fecb43e5a2c57db234038d74b8d1e9a5a78cb11f9fcc957 +size 4278215736 diff --git a/model-00045-of-00049.safetensors b/model-00045-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f600738fed16f013a454d82485ea568e4cb12ec --- /dev/null +++ b/model-00045-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b22196438e39f2d6ec75f6cde6f7de7991a2e6fc078d61e691eb0a4c0de28e +size 4278215744 diff --git a/model-00046-of-00049.safetensors b/model-00046-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4aa9b837b84c7c9f1180ecd844c335c39c4a989 --- /dev/null +++ b/model-00046-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fbbb83123cc0f948c53b238506515a949772d23c03d34b79cb18bbb33a0170 +size 4932552336 diff --git a/model-00047-of-00049.safetensors b/model-00047-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a3b339eff4e8cd781fa9ccbdf4077277efa1b91 --- /dev/null +++ b/model-00047-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3785fce63717ef8121bffd7abd868a428e032fa3f2dc100b1253cdb3c0e71fe0 +size 4278215744 diff --git a/model-00048-of-00049.safetensors b/model-00048-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f7278da26b0156867ed646213e9d12398f80e86 --- /dev/null +++ b/model-00048-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814b638c6001f10857d26a44eee51c2ee87bd063a9c7be67734ef58665a20231 +size 4278215736 diff --git a/model-00049-of-00049.safetensors b/model-00049-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00907140d1adeeb744d0914af1e45215006c3b3b --- /dev/null +++ b/model-00049-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9d5a2ede45b15680e244a4f64610c1f8e1bf9f1b0b09a8c5ae15d6c5e4fd40 +size 4932552296 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..1f3133f44eccf58dd7da694b36bbcea258c7bee5 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1 @@ +{"metadata": {"mergekit_version": "0.1.2"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00049.safetensors", "model.layers.0.input_layernorm.weight": "model-00002-of-00049.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", "model.layers.1.input_layernorm.weight": "model-00002-of-00049.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00002-of-00049.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00003-of-00049.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00003-of-00049.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00049.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00049.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00049.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00049.safetensors", "model.layers.10.input_layernorm.weight": "model-00003-of-00049.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00003-of-00049.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00049.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00004-of-00049.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.input_layernorm.weight": "model-00004-of-00049.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", "model.layers.12.input_layernorm.weight": "model-00004-of-00049.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00049.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00049.safetensors", "model.layers.13.input_layernorm.weight": "model-00005-of-00049.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00005-of-00049.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00049.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00006-of-00049.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00049.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00049.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00049.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00049.safetensors", "model.layers.14.input_layernorm.weight": "model-00006-of-00049.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00006-of-00049.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00049.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00007-of-00049.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.input_layernorm.weight": "model-00007-of-00049.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", "model.layers.16.input_layernorm.weight": "model-00007-of-00049.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00008-of-00049.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00008-of-00049.safetensors", "model.layers.17.input_layernorm.weight": "model-00008-of-00049.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00008-of-00049.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00009-of-00049.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00009-of-00049.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00009-of-00049.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00009-of-00049.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00009-of-00049.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00009-of-00049.safetensors", "model.layers.18.input_layernorm.weight": "model-00009-of-00049.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00009-of-00049.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00009-of-00049.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00010-of-00049.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.input_layernorm.weight": "model-00010-of-00049.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", "model.layers.2.input_layernorm.weight": "model-00010-of-00049.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00011-of-00049.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00011-of-00049.safetensors", "model.layers.20.input_layernorm.weight": "model-00011-of-00049.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00011-of-00049.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00012-of-00049.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00012-of-00049.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00012-of-00049.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00012-of-00049.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00012-of-00049.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00012-of-00049.safetensors", "model.layers.21.input_layernorm.weight": "model-00012-of-00049.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00012-of-00049.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00012-of-00049.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00013-of-00049.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.input_layernorm.weight": "model-00013-of-00049.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", "model.layers.23.input_layernorm.weight": "model-00013-of-00049.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00014-of-00049.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00014-of-00049.safetensors", "model.layers.24.input_layernorm.weight": "model-00014-of-00049.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00014-of-00049.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00015-of-00049.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00015-of-00049.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00015-of-00049.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00015-of-00049.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00015-of-00049.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00015-of-00049.safetensors", "model.layers.25.input_layernorm.weight": "model-00015-of-00049.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00015-of-00049.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00015-of-00049.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00016-of-00049.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.input_layernorm.weight": "model-00016-of-00049.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", "model.layers.27.input_layernorm.weight": "model-00016-of-00049.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00017-of-00049.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00017-of-00049.safetensors", "model.layers.28.input_layernorm.weight": "model-00017-of-00049.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00017-of-00049.safetensors", "model.layers.28.mlp.gate_proj.weight": "model-00018-of-00049.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00018-of-00049.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00018-of-00049.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00018-of-00049.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00018-of-00049.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00018-of-00049.safetensors", "model.layers.29.input_layernorm.weight": "model-00018-of-00049.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00018-of-00049.safetensors", "model.layers.29.mlp.gate_proj.weight": "model-00018-of-00049.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00019-of-00049.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.input_layernorm.weight": "model-00019-of-00049.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", "model.layers.30.input_layernorm.weight": "model-00019-of-00049.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.mlp.gate_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00020-of-00049.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00020-of-00049.safetensors", "model.layers.31.input_layernorm.weight": "model-00020-of-00049.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00020-of-00049.safetensors", "model.layers.31.mlp.gate_proj.weight": "model-00021-of-00049.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00021-of-00049.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00021-of-00049.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00021-of-00049.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00021-of-00049.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00021-of-00049.safetensors", "model.layers.32.input_layernorm.weight": "model-00021-of-00049.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00021-of-00049.safetensors", "model.layers.32.mlp.gate_proj.weight": "model-00021-of-00049.safetensors", "model.layers.32.mlp.up_proj.weight": "model-00022-of-00049.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.input_layernorm.weight": "model-00022-of-00049.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.mlp.gate_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.mlp.up_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", "model.layers.34.input_layernorm.weight": "model-00022-of-00049.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.mlp.gate_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.mlp.up_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00023-of-00049.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00023-of-00049.safetensors", "model.layers.35.input_layernorm.weight": "model-00023-of-00049.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00023-of-00049.safetensors", "model.layers.35.mlp.gate_proj.weight": "model-00024-of-00049.safetensors", "model.layers.35.mlp.up_proj.weight": "model-00024-of-00049.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00024-of-00049.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00024-of-00049.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00024-of-00049.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00024-of-00049.safetensors", "model.layers.36.input_layernorm.weight": "model-00024-of-00049.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00024-of-00049.safetensors", "model.layers.36.mlp.gate_proj.weight": "model-00024-of-00049.safetensors", "model.layers.36.mlp.up_proj.weight": "model-00025-of-00049.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.input_layernorm.weight": "model-00025-of-00049.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.mlp.gate_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.mlp.up_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", "model.layers.38.input_layernorm.weight": "model-00025-of-00049.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.mlp.gate_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.mlp.up_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00026-of-00049.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00026-of-00049.safetensors", "model.layers.39.input_layernorm.weight": "model-00026-of-00049.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00026-of-00049.safetensors", "model.layers.39.mlp.gate_proj.weight": "model-00027-of-00049.safetensors", "model.layers.39.mlp.up_proj.weight": "model-00027-of-00049.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00027-of-00049.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00027-of-00049.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00027-of-00049.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00027-of-00049.safetensors", "model.layers.4.input_layernorm.weight": "model-00027-of-00049.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00027-of-00049.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00027-of-00049.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00028-of-00049.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.input_layernorm.weight": "model-00028-of-00049.safetensors", "model.layers.40.mlp.down_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.mlp.gate_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.mlp.up_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", "model.layers.41.input_layernorm.weight": "model-00028-of-00049.safetensors", "model.layers.41.mlp.down_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.mlp.gate_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.mlp.up_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00029-of-00049.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00029-of-00049.safetensors", "model.layers.42.input_layernorm.weight": "model-00029-of-00049.safetensors", "model.layers.42.mlp.down_proj.weight": "model-00029-of-00049.safetensors", "model.layers.42.mlp.gate_proj.weight": "model-00030-of-00049.safetensors", "model.layers.42.mlp.up_proj.weight": "model-00030-of-00049.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00030-of-00049.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00030-of-00049.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00030-of-00049.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00030-of-00049.safetensors", "model.layers.43.input_layernorm.weight": "model-00030-of-00049.safetensors", "model.layers.43.mlp.down_proj.weight": "model-00030-of-00049.safetensors", "model.layers.43.mlp.gate_proj.weight": "model-00030-of-00049.safetensors", "model.layers.43.mlp.up_proj.weight": "model-00031-of-00049.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.input_layernorm.weight": "model-00031-of-00049.safetensors", "model.layers.44.mlp.down_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.mlp.gate_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.mlp.up_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", "model.layers.45.input_layernorm.weight": "model-00031-of-00049.safetensors", "model.layers.45.mlp.down_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.mlp.gate_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.mlp.up_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00032-of-00049.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00032-of-00049.safetensors", "model.layers.46.input_layernorm.weight": "model-00032-of-00049.safetensors", "model.layers.46.mlp.down_proj.weight": "model-00032-of-00049.safetensors", "model.layers.46.mlp.gate_proj.weight": "model-00033-of-00049.safetensors", "model.layers.46.mlp.up_proj.weight": "model-00033-of-00049.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00033-of-00049.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00033-of-00049.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00033-of-00049.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00033-of-00049.safetensors", "model.layers.47.input_layernorm.weight": "model-00033-of-00049.safetensors", "model.layers.47.mlp.down_proj.weight": "model-00033-of-00049.safetensors", "model.layers.47.mlp.gate_proj.weight": "model-00033-of-00049.safetensors", "model.layers.47.mlp.up_proj.weight": "model-00034-of-00049.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.input_layernorm.weight": "model-00034-of-00049.safetensors", "model.layers.48.mlp.down_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.mlp.gate_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.mlp.up_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", "model.layers.49.input_layernorm.weight": "model-00034-of-00049.safetensors", "model.layers.49.mlp.down_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.mlp.gate_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.mlp.up_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00035-of-00049.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00035-of-00049.safetensors", "model.layers.5.input_layernorm.weight": "model-00035-of-00049.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00035-of-00049.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00036-of-00049.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00036-of-00049.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00036-of-00049.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00036-of-00049.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00036-of-00049.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00036-of-00049.safetensors", "model.layers.50.input_layernorm.weight": "model-00036-of-00049.safetensors", "model.layers.50.mlp.down_proj.weight": "model-00036-of-00049.safetensors", "model.layers.50.mlp.gate_proj.weight": "model-00036-of-00049.safetensors", "model.layers.50.mlp.up_proj.weight": "model-00037-of-00049.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.input_layernorm.weight": "model-00037-of-00049.safetensors", "model.layers.51.mlp.down_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.mlp.gate_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.mlp.up_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", "model.layers.52.input_layernorm.weight": "model-00037-of-00049.safetensors", "model.layers.52.mlp.down_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.mlp.gate_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.mlp.up_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00038-of-00049.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00038-of-00049.safetensors", "model.layers.53.input_layernorm.weight": "model-00038-of-00049.safetensors", "model.layers.53.mlp.down_proj.weight": "model-00038-of-00049.safetensors", "model.layers.53.mlp.gate_proj.weight": "model-00039-of-00049.safetensors", "model.layers.53.mlp.up_proj.weight": "model-00039-of-00049.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00039-of-00049.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00039-of-00049.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00039-of-00049.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00039-of-00049.safetensors", "model.layers.54.input_layernorm.weight": "model-00039-of-00049.safetensors", "model.layers.54.mlp.down_proj.weight": "model-00039-of-00049.safetensors", "model.layers.54.mlp.gate_proj.weight": "model-00039-of-00049.safetensors", "model.layers.54.mlp.up_proj.weight": "model-00040-of-00049.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.input_layernorm.weight": "model-00040-of-00049.safetensors", "model.layers.55.mlp.down_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.mlp.gate_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.mlp.up_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", "model.layers.56.input_layernorm.weight": "model-00040-of-00049.safetensors", "model.layers.56.mlp.down_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.mlp.gate_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.mlp.up_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00041-of-00049.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00041-of-00049.safetensors", "model.layers.57.input_layernorm.weight": "model-00041-of-00049.safetensors", "model.layers.57.mlp.down_proj.weight": "model-00041-of-00049.safetensors", "model.layers.57.mlp.gate_proj.weight": "model-00042-of-00049.safetensors", "model.layers.57.mlp.up_proj.weight": "model-00042-of-00049.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00042-of-00049.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00042-of-00049.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00042-of-00049.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00042-of-00049.safetensors", "model.layers.58.input_layernorm.weight": "model-00042-of-00049.safetensors", "model.layers.58.mlp.down_proj.weight": "model-00042-of-00049.safetensors", "model.layers.58.mlp.gate_proj.weight": "model-00042-of-00049.safetensors", "model.layers.58.mlp.up_proj.weight": "model-00043-of-00049.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.input_layernorm.weight": "model-00043-of-00049.safetensors", "model.layers.59.mlp.down_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.mlp.gate_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.mlp.up_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", "model.layers.6.input_layernorm.weight": "model-00043-of-00049.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00044-of-00049.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00044-of-00049.safetensors", "model.layers.60.input_layernorm.weight": "model-00044-of-00049.safetensors", "model.layers.60.mlp.down_proj.weight": "model-00044-of-00049.safetensors", "model.layers.60.mlp.gate_proj.weight": "model-00045-of-00049.safetensors", "model.layers.60.mlp.up_proj.weight": "model-00045-of-00049.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00045-of-00049.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00045-of-00049.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00045-of-00049.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00045-of-00049.safetensors", "model.layers.61.input_layernorm.weight": "model-00045-of-00049.safetensors", "model.layers.61.mlp.down_proj.weight": "model-00045-of-00049.safetensors", "model.layers.61.mlp.gate_proj.weight": "model-00045-of-00049.safetensors", "model.layers.61.mlp.up_proj.weight": "model-00046-of-00049.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", "model.layers.61.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", "model.layers.61.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.input_layernorm.weight": "model-00046-of-00049.safetensors", "model.layers.62.mlp.down_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.mlp.gate_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.mlp.up_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", "model.layers.63.input_layernorm.weight": "model-00046-of-00049.safetensors", "model.layers.63.mlp.down_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.mlp.gate_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.mlp.up_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.self_attn.o_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.self_attn.q_proj.weight": "model-00047-of-00049.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00047-of-00049.safetensors", "model.layers.7.input_layernorm.weight": "model-00047-of-00049.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00047-of-00049.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00048-of-00049.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00048-of-00049.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00048-of-00049.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00048-of-00049.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00048-of-00049.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00048-of-00049.safetensors", "model.layers.8.input_layernorm.weight": "model-00048-of-00049.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00048-of-00049.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00048-of-00049.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00049-of-00049.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00049-of-00049.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00049-of-00049.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00049-of-00049.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.input_layernorm.weight": "model-00049-of-00049.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00049-of-00049.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00049-of-00049.safetensors", "model.norm.weight": "model-00049-of-00049.safetensors"}} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a8eb9ef63af01dfa2c350573b543b14275370944 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e13c779896a87c94c9e0df9482975c8b8485265 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953b2730d23ca19e7dca96f75f3e10b497bb679290b06d8981190bff2039fc72 +size 20124922 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..58a110fc1da0cacbac440922392f8b73d9b0a30e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,351 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255000": { + "content": "<|START_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255001": { + "content": "<|END_OF_TURN_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255002": { + "content": "<|YES_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255003": { + "content": "<|NO_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255004": { + "content": "<|GOOD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255005": { + "content": "<|BAD_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255006": { + "content": "<|USER_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255007": { + "content": "<|CHATBOT_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255008": { + "content": "<|SYSTEM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255009": { + "content": "<|USER_0_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255010": { + "content": "<|USER_1_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255011": { + "content": "<|USER_2_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255012": { + "content": "<|USER_3_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255013": { + "content": "<|USER_4_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255014": { + "content": "<|USER_5_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255015": { + "content": "<|USER_6_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255016": { + "content": "<|USER_7_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255017": { + "content": "<|USER_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255018": { + "content": "<|USER_9_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255019": { + "content": "<|START_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255020": { + "content": "<|END_THINKING|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255021": { + "content": "<|START_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255022": { + "content": "<|END_RESPONSE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255023": { + "content": "<|START_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255024": { + "content": "<|END_ACTION|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255025": { + "content": "<|START_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255026": { + "content": "<|END_TOOL_RESULT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255027": { + "content": "<|EXTRA_8_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255028": { + "content": "<|NEW_FILE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255029": { + "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255030": { + "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255031": { + "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255032": { + "content": "<|END_OF_MIDDLE_FIM_TOKEN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] + '\n\n' }}{% set loop_messages = messages[1:] %}{% else %}{{ 'Below are some instructions that describe some tasks. Write responses that appropriately complete each request.' + '\n\n' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\n' + message['content'] + eos_token + '\n\n' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response:\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|END_OF_TURN_TOKEN|>", + "extra_special_tokens": {}, + "legacy": true, + "merges_file": null, + "model_max_length": 262144, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "CohereTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "vocab_file": null +}