diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..446e54d45deda1a6b7912a58660402b9c7f686f2 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +--- +language: +- en +- zh +library_name: mlx +license: mit +pipeline_tag: text-generation +base_model: zai-org/GLM-4.6 +tags: +- mlx +--- + +# mlx-community/GLM-4.6-bf16 + +This model [mlx-community/GLM-4.6-bf16](https://huggingface.co/mlx-community/GLM-4.6-bf16) was +converted to MLX format from [zai-org/GLM-4.6](https://huggingface.co/zai-org/GLM-4.6) +using mlx-lm version **0.28.2**. + +## Use with mlx + +```bash +pip install mlx-lm +``` + +```python +from mlx_lm import load, generate + +model, tokenizer = load("mlx-community/GLM-4.6-bf16") + +prompt = "hello" + +if tokenizer.chat_template is not None: + messages = [{"role": "user", "content": prompt}] + prompt = tokenizer.apply_chat_template( + messages, add_generation_prompt=True + ) + +response = generate(model, tokenizer, prompt=prompt, verbose=True) +``` diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..41478957aca7a04b7321022e7d1f73de5badd995 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,103 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|> +{{ visible_text(m.content) }} +{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if loop.index0 > ns.last_user_index and reasoning_content -%} +{{ '\n' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '\n' }} +{%- endif -%} +{%- if content.strip() -%} +{{ '\n' + content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{ '\n' + tc.name }} +{% set _args = tc.arguments %} +{% for k, v in _args.items() %} +{{ k }} +{{ v | tojson(ensure_ascii=False) if v is not string else v }} +{% endfor %} +{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '\n\n' }} +{{- m.content }} +{{- '\n' }} +{%- else -%} +<|observation|>{% for tr in m.content %} + + +{{ tr.output if tr.output is defined else tr }} +{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|> +{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '\n' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e67cfea5dbb2e1f9ba6a1455206a9d5c22805f2b --- /dev/null +++ b/config.json @@ -0,0 +1,43 @@ +{ + "architectures": [ + "Glm4MoeForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "first_k_dense_replace": 3, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 12288, + "max_position_embeddings": 202752, + "model_type": "glm4_moe", + "moe_intermediate_size": 1536, + "n_group": 1, + "n_routed_experts": 160, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 96, + "num_experts_per_tok": 8, + "num_hidden_layers": 92, + "num_key_value_heads": 8, + "num_nextn_predict_layers": 1, + "pad_token_id": 151329, + "partial_rotary_factor": 0.5, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000, + "routed_scaling_factor": 2.5, + "tie_word_embeddings": false, + "topk_group": 1, + "torch_dtype": "bfloat16", + "transformers_version": "4.54.0", + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 151552 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f51194759eb31dde6fbc75a28e3fb7036f68161a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "pad_token_id": 151329, + "temperature": 1.0, + "transformers_version": "4.56.2" +} diff --git a/model-00003-of-00135.safetensors b/model-00003-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..494498a8000bacadfd32a57949a6b87e637fbb97 --- /dev/null +++ b/model-00003-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a039fed5fc014a5ffe10922f80d5a9f21ab52a2af7c79986aca058cf49fcfcd8 +size 5354671340 diff --git a/model-00006-of-00135.safetensors b/model-00006-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc30c4cf7edef11daed09820f777499f3b73f29c --- /dev/null +++ b/model-00006-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318b54f933b998aeb5119934c2eb562047c99f32f242a7b67a359f3db4b2eb83 +size 5354671340 diff --git a/model-00009-of-00135.safetensors b/model-00009-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..797752d7a9c9ab930fa713221361c280f2a88762 --- /dev/null +++ b/model-00009-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d62ebd404a3878e4b50db9750f576d165f7c22812b96ed9da8980c63b87abe +size 5354671340 diff --git a/model-00011-of-00135.safetensors b/model-00011-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4700035311ff6b91205e4579f81d5349799a156 --- /dev/null +++ b/model-00011-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b9efc2c6dd045cab6a88b354a81fd4b37214ad5a7d9714f1bbee612f29506b +size 5033165088 diff --git a/model-00014-of-00135.safetensors b/model-00014-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..186a569acfa2b3f01e9c7b62e4e889de00be28b2 --- /dev/null +++ b/model-00014-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022cedd1fc0fd6d39244439c89291db779b72579cdfc3e2d7ec93beed1eb4294 +size 5033165090 diff --git a/model-00022-of-00135.safetensors b/model-00022-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26182980e2c30bb3da9d3ad90b3da46fdf83d751 --- /dev/null +++ b/model-00022-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22301a0424e9ac23829b4f0af7eaa370bcf73c1ddd3e410270c509175b9cb8c5 +size 5354671300 diff --git a/model-00023-of-00135.safetensors b/model-00023-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cf923fa35f51ca152c7e637a24c1b859f16ad06 --- /dev/null +++ b/model-00023-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5324f61050007c9330fbb06821af7877a687c5f842dfdd7fea25da92ab921f15 +size 5033165090 diff --git a/model-00026-of-00135.safetensors b/model-00026-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06eef7225f085997e34b12df9b577900a96cbb95 --- /dev/null +++ b/model-00026-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d71580e389970e1a67695f89ca3e4eb38db59240bec384ce5194e117d746780 +size 5033165090 diff --git a/model-00028-of-00135.safetensors b/model-00028-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f75d4f583695aadb64d77ff956455b2b14c28ce5 --- /dev/null +++ b/model-00028-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3815acebb6ac6ec4f5d4994f2cff8698515e8a51c6bce3225784f0036b58eb2c +size 5354671302 diff --git a/model-00029-of-00135.safetensors b/model-00029-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e21996ff1c0612853c9b9f28de7c5cd60567a7db --- /dev/null +++ b/model-00029-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41537d8d0a45a6ae1d79922922431ebb64f3d9883e18cea067463944b3477e3a +size 5033165090 diff --git a/model-00031-of-00135.safetensors b/model-00031-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71f2dac8b8b7240ba5bf45805540df3b85d8ff11 --- /dev/null +++ b/model-00031-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882b39c46deb3e1951d4f6d602a1f439b317c6529a1a70b8e0976977abf2a74c +size 5354671322 diff --git a/model-00034-of-00135.safetensors b/model-00034-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c880d40e444fcf3d7892ed0d1a112b1c5e426c36 --- /dev/null +++ b/model-00034-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327546e2b3901f4aa07b97dee94f362ac4415a09b126f80cf0bce747a6adf0e9 +size 5354671310 diff --git a/model-00042-of-00135.safetensors b/model-00042-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdf437a045f78130e7cbb4b08b9c0003e81bfd34 --- /dev/null +++ b/model-00042-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23408c9831918cc124151fb96749d9ed3db95930235e9470ba83e6646f44b7a9 +size 5354671358 diff --git a/model-00047-of-00135.safetensors b/model-00047-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..419faa9e29f823afc54bf21ad1f3c7b0b7200fab --- /dev/null +++ b/model-00047-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2c54f1338b943766495b5ce2ac8b26b4b9a15a5e19b2fbcdcb5290a3b97069 +size 5033165090 diff --git a/model-00048-of-00135.safetensors b/model-00048-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04e5c039403c8ebd6cbe15e428c279f2f4637164 --- /dev/null +++ b/model-00048-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c9ee1115700fd61534c1ac9c363ba76a863d65fe07164f53718fba393b90e5 +size 5354671302 diff --git a/model-00050-of-00135.safetensors b/model-00050-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66f183bc8317689f72a5e17909ff1b11581af54c --- /dev/null +++ b/model-00050-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae0482af9fc9799284f60dad916bfb8394d307ffce6a1286dc1791b0677d541 +size 5033165090 diff --git a/model-00055-of-00135.safetensors b/model-00055-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6865c78877710301762089d0a5f9128be45a840c --- /dev/null +++ b/model-00055-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3031ee37d1fcbe393cc236ea0997e4079bad6041a83e781987f2010abac5062 +size 5354671322 diff --git a/model-00062-of-00135.safetensors b/model-00062-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b223e61dfaabe1014172494c3db08d2153d13c2 --- /dev/null +++ b/model-00062-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbb024b3c345a7aab4cb7f1c5635e6e4739b51c18a1a1e27d4eb85fcca89e64 +size 5033165090 diff --git a/model-00063-of-00135.safetensors b/model-00063-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86bf2ff370b34ac4a5aa8287fc4f8fb364e9f46b --- /dev/null +++ b/model-00063-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17908bfb29017abf78378ce68464b617993ff775fd95959a378187dfed837b54 +size 5354671358 diff --git a/model-00066-of-00135.safetensors b/model-00066-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ed075c7ba85368bd298fcd071e97d806b6e4f25 --- /dev/null +++ b/model-00066-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718163cc3c9e2c614640bb863daad1620818d849a3f0450301188b26935749e8 +size 5354671308 diff --git a/model-00067-of-00135.safetensors b/model-00067-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31ac3bdbbd329018e1ce356613016d79db3f2a0a --- /dev/null +++ b/model-00067-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43fcebae0b9c4daeae60f158967946f5e751457a91fa4c9f9f296bfe403d0997 +size 5354671298 diff --git a/model-00068-of-00135.safetensors b/model-00068-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3d732999a39049e82954d558acd3c8ad3a85f66 --- /dev/null +++ b/model-00068-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbc7d74575909ee94c0f34ce40f68d98ba0e3e9d650d5930f1282115df2de90 +size 5033165090 diff --git a/model-00070-of-00135.safetensors b/model-00070-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8814868e2ad2e688091a73565c137685152375a7 --- /dev/null +++ b/model-00070-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3193419780cb1e9dba54d8363d89b86ea0fa67f348dc62f709a410a8be0a7d +size 5354671344 diff --git a/model-00075-of-00135.safetensors b/model-00075-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea168eede3bcbec50b58f70e18ea03250ab40cf5 --- /dev/null +++ b/model-00075-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3382ea86ba21ddf555091c5aa53eecaf16c579aaec34389317f33e6e431b5964 +size 5354671358 diff --git a/model-00083-of-00135.safetensors b/model-00083-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4354e70e86912efb90210b3f0d206a2f6125ed0 --- /dev/null +++ b/model-00083-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e04fd2f48023a0f2577f79c1f7b66a150dbba8a99f9871aad4c538b41f481aa +size 5033165090 diff --git a/model-00086-of-00135.safetensors b/model-00086-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08d4b794b35792415e01b5cb6dd3cad6ee0ae392 --- /dev/null +++ b/model-00086-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b85093e5734d734f88f9b63e34348939dada0afab6fd16241ed93a7cccf8b86 +size 5033165090 diff --git a/model-00089-of-00135.safetensors b/model-00089-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..003831327929c4e01b5a84feefe471e00185acd8 --- /dev/null +++ b/model-00089-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135eef13c42126dee9cab8d3c69174c87a681984987db0a0adf77b023cb63d27 +size 5033165090 diff --git a/model-00090-of-00135.safetensors b/model-00090-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a901579f04817d51e74765fd6f5ed053c8ba8bb0 --- /dev/null +++ b/model-00090-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393c6508cb9c7ba3479539f9eac22a6c8158a75ed05d9a6157d093c5a182ed21 +size 5354671358 diff --git a/model-00091-of-00135.safetensors b/model-00091-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a163dee228678e6cd07dcdc55a8f29c4c60a4f16 --- /dev/null +++ b/model-00091-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb1475a9af916e5f0323acdf7f5de760eb822adc8055245beb1697c0f0eda35f +size 5354671330 diff --git a/model-00094-of-00135.safetensors b/model-00094-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..584de270656ce99baad68791e4f0d9ee612043bb --- /dev/null +++ b/model-00094-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66928625c7fb846d6e4fef39639fff0e1464bbdf5be3df9a23de00066d75c402 +size 5354671312 diff --git a/model-00095-of-00135.safetensors b/model-00095-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..336f6b2ad88cc3073cf17445e319a6dee415deca --- /dev/null +++ b/model-00095-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37574ab00812067339d0646061c4338e8ac53afce41db1b64711643166e9185 +size 5033165090 diff --git a/model-00102-of-00135.safetensors b/model-00102-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4db1a6e3d82ed0a8e79f9a2c94b04fa8d604876 --- /dev/null +++ b/model-00102-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69073be1e17bc9d09f473749dd6bec62c5cc6ee51099a97379dbdc230306b569 +size 5354671348 diff --git a/model-00103-of-00135.safetensors b/model-00103-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a4491413ca84df3e9f376f504aa626d76a1fdb7 --- /dev/null +++ b/model-00103-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0f50e2fff38a141741b59b3bf99c3fed71d1631f579d378c84b1f8e149aa79 +size 5354671308 diff --git a/model-00107-of-00135.safetensors b/model-00107-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28a805bce9754192df57761aa811f96695908a1c --- /dev/null +++ b/model-00107-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a4c1d712945e9c2fb1194a6e24dcdbb02b28858758a76c655e933eb9e3fe7c +size 5033165090 diff --git a/model-00108-of-00135.safetensors b/model-00108-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f810cd0d73a21c66f8567362be7a8b2d5d80e1d7 --- /dev/null +++ b/model-00108-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe63cafa7b1de08d4322c5ec5a408263d8f1e306e70ed64aab02f476d5d1868 +size 5354671326 diff --git a/model-00110-of-00135.safetensors b/model-00110-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a946ae2db66d8709bdd45e019f85714cc4d92db0 --- /dev/null +++ b/model-00110-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5ab6cb42b0ff1b15502d93d6586aa45e75da9ee66a42cc873c5fb029f717fe +size 5033165090 diff --git a/model-00115-of-00135.safetensors b/model-00115-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c8e62d5b93215b04b8d0296f03163f57ad50e50 --- /dev/null +++ b/model-00115-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a7374ce7ec2336d6cdbeaa03f38067beb4101e6ebcf18094e73ce2d3d80e9a +size 5354671316 diff --git a/model-00122-of-00135.safetensors b/model-00122-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f81137ddd5a7cbb0e675592ce4ce4f8b1c00be09 --- /dev/null +++ b/model-00122-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd206177e2a3f318a246edc5d28e3ce5c6a624896ebc4286c5414d29f3ae3b3c +size 5033165090 diff --git a/model-00127-of-00135.safetensors b/model-00127-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8517b296698e3236e8667a8cdc74dcf9f8417979 --- /dev/null +++ b/model-00127-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0491ed1593137b9aebf920636c3be8ae6b96b71bccb6c785d69db2cbd1838455 +size 5354671324 diff --git a/model-00128-of-00135.safetensors b/model-00128-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e5dc714ce3277479fd4b65df627e41e619e8219 --- /dev/null +++ b/model-00128-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627df892d2cbd3e5c9876214f6ca1e65af1642291ffe69961ba8a2a7adbda675 +size 5033165090 diff --git a/model-00130-of-00135.safetensors b/model-00130-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad730b438ae2e5c941bbfc7b7856d90da4bc0795 --- /dev/null +++ b/model-00130-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91c1d0468380e6f3c15fbd677f6ba2ac54fdca62f0eb57c9b9c0e3fa8d4c758 +size 5354671296 diff --git a/model-00135-of-00135.safetensors b/model-00135-of-00135.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1b5bb85503c363c2ce8149da5204884f5a0542a --- /dev/null +++ b/model-00135-of-00135.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124063028da07d0c9d60015fb6c32e39ef79b2219f3e74f4d3cb9ca41e52be6d +size 4117331738 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..debab1e759c9cd6cbc086687acabb3d31068e515 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1744 @@ +{ + "metadata": { + "total_size": 705595686528, + "total_parameters": 352797829024 + }, + "weight_map": { + "lm_head.weight": "model-00135-of-00135.safetensors", + "model.embed_tokens.weight": "model-00001-of-00135.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.10.input_layernorm.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.gate.e_score_correction_bias": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.gate.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.switch_mlp.down_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.10.mlp.switch_mlp.gate_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.10.mlp.switch_mlp.up_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00013-of-00135.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00012-of-00135.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.11.input_layernorm.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.gate.e_score_correction_bias": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.gate.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.switch_mlp.down_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.11.mlp.switch_mlp.gate_proj.weight": "model-00014-of-00135.safetensors", + "model.layers.11.mlp.switch_mlp.up_proj.weight": "model-00014-of-00135.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00015-of-00135.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00013-of-00135.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00013-of-00135.safetensors", + "model.layers.12.input_layernorm.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.gate.e_score_correction_bias": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.gate.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.switch_mlp.down_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.12.mlp.switch_mlp.gate_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.12.mlp.switch_mlp.up_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00016-of-00135.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00015-of-00135.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00015-of-00135.safetensors", + "model.layers.13.input_layernorm.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.gate.e_score_correction_bias": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.gate.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.switch_mlp.down_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.13.mlp.switch_mlp.gate_proj.weight": "model-00017-of-00135.safetensors", + "model.layers.13.mlp.switch_mlp.up_proj.weight": "model-00017-of-00135.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00018-of-00135.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00016-of-00135.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00016-of-00135.safetensors", + "model.layers.14.input_layernorm.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.gate.e_score_correction_bias": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.gate.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.switch_mlp.down_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.14.mlp.switch_mlp.gate_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.14.mlp.switch_mlp.up_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00019-of-00135.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00018-of-00135.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00018-of-00135.safetensors", + "model.layers.15.input_layernorm.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.gate.e_score_correction_bias": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.gate.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.switch_mlp.down_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.15.mlp.switch_mlp.gate_proj.weight": "model-00020-of-00135.safetensors", + "model.layers.15.mlp.switch_mlp.up_proj.weight": "model-00020-of-00135.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00021-of-00135.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00019-of-00135.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00019-of-00135.safetensors", + "model.layers.16.input_layernorm.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.gate.e_score_correction_bias": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.gate.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.switch_mlp.down_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.16.mlp.switch_mlp.gate_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.16.mlp.switch_mlp.up_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00022-of-00135.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00021-of-00135.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00021-of-00135.safetensors", + "model.layers.17.input_layernorm.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.gate.e_score_correction_bias": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.gate.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.switch_mlp.down_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.17.mlp.switch_mlp.gate_proj.weight": "model-00023-of-00135.safetensors", + "model.layers.17.mlp.switch_mlp.up_proj.weight": "model-00023-of-00135.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00024-of-00135.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00022-of-00135.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00022-of-00135.safetensors", + "model.layers.18.input_layernorm.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.gate.e_score_correction_bias": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.gate.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.switch_mlp.down_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.18.mlp.switch_mlp.gate_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.18.mlp.switch_mlp.up_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00025-of-00135.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00024-of-00135.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00024-of-00135.safetensors", + "model.layers.19.input_layernorm.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.gate.e_score_correction_bias": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.gate.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.switch_mlp.down_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.19.mlp.switch_mlp.gate_proj.weight": "model-00026-of-00135.safetensors", + "model.layers.19.mlp.switch_mlp.up_proj.weight": "model-00026-of-00135.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00027-of-00135.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00025-of-00135.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00025-of-00135.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.20.input_layernorm.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.gate.e_score_correction_bias": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.gate.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.switch_mlp.down_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.20.mlp.switch_mlp.gate_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.20.mlp.switch_mlp.up_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00028-of-00135.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00027-of-00135.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00027-of-00135.safetensors", + "model.layers.21.input_layernorm.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.gate.e_score_correction_bias": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.gate.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.switch_mlp.down_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.21.mlp.switch_mlp.gate_proj.weight": "model-00029-of-00135.safetensors", + "model.layers.21.mlp.switch_mlp.up_proj.weight": "model-00029-of-00135.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00030-of-00135.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00028-of-00135.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00028-of-00135.safetensors", + "model.layers.22.input_layernorm.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.gate.e_score_correction_bias": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.gate.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.switch_mlp.down_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.22.mlp.switch_mlp.gate_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.22.mlp.switch_mlp.up_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00031-of-00135.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00030-of-00135.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00030-of-00135.safetensors", + "model.layers.23.input_layernorm.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.gate.e_score_correction_bias": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.gate.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.switch_mlp.down_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.23.mlp.switch_mlp.gate_proj.weight": "model-00032-of-00135.safetensors", + "model.layers.23.mlp.switch_mlp.up_proj.weight": "model-00032-of-00135.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00033-of-00135.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00031-of-00135.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00031-of-00135.safetensors", + "model.layers.24.input_layernorm.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.gate.e_score_correction_bias": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.gate.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.switch_mlp.down_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.24.mlp.switch_mlp.gate_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.24.mlp.switch_mlp.up_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00034-of-00135.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00033-of-00135.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00033-of-00135.safetensors", + "model.layers.25.input_layernorm.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.gate.e_score_correction_bias": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.gate.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.switch_mlp.down_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.25.mlp.switch_mlp.gate_proj.weight": "model-00035-of-00135.safetensors", + "model.layers.25.mlp.switch_mlp.up_proj.weight": "model-00035-of-00135.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00036-of-00135.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00034-of-00135.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00034-of-00135.safetensors", + "model.layers.26.input_layernorm.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.gate.e_score_correction_bias": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.gate.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.switch_mlp.down_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.26.mlp.switch_mlp.gate_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00037-of-00135.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00036-of-00135.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00036-of-00135.safetensors", + "model.layers.27.input_layernorm.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.gate.e_score_correction_bias": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.gate.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.switch_mlp.down_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.27.mlp.switch_mlp.gate_proj.weight": "model-00038-of-00135.safetensors", + "model.layers.27.mlp.switch_mlp.up_proj.weight": "model-00038-of-00135.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00039-of-00135.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00037-of-00135.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00037-of-00135.safetensors", + "model.layers.28.input_layernorm.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.gate.e_score_correction_bias": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.gate.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.switch_mlp.down_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.28.mlp.switch_mlp.gate_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.28.mlp.switch_mlp.up_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00040-of-00135.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00039-of-00135.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00039-of-00135.safetensors", + "model.layers.29.input_layernorm.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.gate.e_score_correction_bias": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.gate.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.switch_mlp.down_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.29.mlp.switch_mlp.gate_proj.weight": "model-00041-of-00135.safetensors", + "model.layers.29.mlp.switch_mlp.up_proj.weight": "model-00041-of-00135.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00042-of-00135.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00040-of-00135.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00040-of-00135.safetensors", + "model.layers.3.input_layernorm.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.gate.e_score_correction_bias": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.gate.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.switch_mlp.down_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.3.mlp.switch_mlp.gate_proj.weight": "model-00002-of-00135.safetensors", + "model.layers.3.mlp.switch_mlp.up_proj.weight": "model-00002-of-00135.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00135.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00135.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00135.safetensors", + "model.layers.30.input_layernorm.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.gate.e_score_correction_bias": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.gate.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.switch_mlp.down_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.30.mlp.switch_mlp.gate_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.30.mlp.switch_mlp.up_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00043-of-00135.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00042-of-00135.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00042-of-00135.safetensors", + "model.layers.31.input_layernorm.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.gate.e_score_correction_bias": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.gate.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.switch_mlp.down_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.31.mlp.switch_mlp.gate_proj.weight": "model-00044-of-00135.safetensors", + "model.layers.31.mlp.switch_mlp.up_proj.weight": "model-00044-of-00135.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00045-of-00135.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00043-of-00135.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00043-of-00135.safetensors", + "model.layers.32.input_layernorm.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.gate.e_score_correction_bias": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.gate.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.switch_mlp.down_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.32.mlp.switch_mlp.gate_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.32.mlp.switch_mlp.up_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00046-of-00135.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00045-of-00135.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00045-of-00135.safetensors", + "model.layers.33.input_layernorm.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.gate.e_score_correction_bias": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.gate.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.switch_mlp.down_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.33.mlp.switch_mlp.gate_proj.weight": "model-00047-of-00135.safetensors", + "model.layers.33.mlp.switch_mlp.up_proj.weight": "model-00047-of-00135.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00048-of-00135.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00046-of-00135.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00046-of-00135.safetensors", + "model.layers.34.input_layernorm.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.gate.e_score_correction_bias": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.gate.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.switch_mlp.down_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.34.mlp.switch_mlp.gate_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.34.mlp.switch_mlp.up_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00049-of-00135.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00048-of-00135.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00048-of-00135.safetensors", + "model.layers.35.input_layernorm.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.gate.e_score_correction_bias": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.gate.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.switch_mlp.down_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.35.mlp.switch_mlp.gate_proj.weight": "model-00050-of-00135.safetensors", + "model.layers.35.mlp.switch_mlp.up_proj.weight": "model-00050-of-00135.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00051-of-00135.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00049-of-00135.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00049-of-00135.safetensors", + "model.layers.36.input_layernorm.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.gate.e_score_correction_bias": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.gate.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.switch_mlp.down_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.36.mlp.switch_mlp.gate_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.36.mlp.switch_mlp.up_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00052-of-00135.safetensors", + "model.layers.36.self_attn.k_norm.weight": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.q_norm.weight": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00051-of-00135.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00051-of-00135.safetensors", + "model.layers.37.input_layernorm.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.gate.e_score_correction_bias": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.gate.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.switch_mlp.down_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.37.mlp.switch_mlp.gate_proj.weight": "model-00053-of-00135.safetensors", + "model.layers.37.mlp.switch_mlp.up_proj.weight": "model-00053-of-00135.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00054-of-00135.safetensors", + "model.layers.37.self_attn.k_norm.weight": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.q_norm.weight": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00052-of-00135.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00052-of-00135.safetensors", + "model.layers.38.input_layernorm.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.gate.e_score_correction_bias": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.gate.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.switch_mlp.down_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.38.mlp.switch_mlp.gate_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.38.mlp.switch_mlp.up_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00055-of-00135.safetensors", + "model.layers.38.self_attn.k_norm.weight": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.q_norm.weight": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00054-of-00135.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00054-of-00135.safetensors", + "model.layers.39.input_layernorm.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.gate.e_score_correction_bias": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.gate.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.switch_mlp.down_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.39.mlp.switch_mlp.gate_proj.weight": "model-00056-of-00135.safetensors", + "model.layers.39.mlp.switch_mlp.up_proj.weight": "model-00056-of-00135.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00057-of-00135.safetensors", + "model.layers.39.self_attn.k_norm.weight": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.q_norm.weight": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00055-of-00135.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00055-of-00135.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.gate.e_score_correction_bias": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.gate.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.switch_mlp.down_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.4.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.4.mlp.switch_mlp.up_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00135.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00003-of-00135.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00135.safetensors", + "model.layers.40.input_layernorm.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.gate.e_score_correction_bias": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.gate.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.switch_mlp.down_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.40.mlp.switch_mlp.gate_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.40.mlp.switch_mlp.up_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00058-of-00135.safetensors", + "model.layers.40.self_attn.k_norm.weight": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.q_norm.weight": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00057-of-00135.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00057-of-00135.safetensors", + "model.layers.41.input_layernorm.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.gate.e_score_correction_bias": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.gate.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.switch_mlp.down_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.41.mlp.switch_mlp.gate_proj.weight": "model-00059-of-00135.safetensors", + "model.layers.41.mlp.switch_mlp.up_proj.weight": "model-00059-of-00135.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00060-of-00135.safetensors", + "model.layers.41.self_attn.k_norm.weight": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.q_norm.weight": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00058-of-00135.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00058-of-00135.safetensors", + "model.layers.42.input_layernorm.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.gate.e_score_correction_bias": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.gate.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.switch_mlp.down_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.42.mlp.switch_mlp.gate_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.42.mlp.switch_mlp.up_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00061-of-00135.safetensors", + "model.layers.42.self_attn.k_norm.weight": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.q_norm.weight": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00060-of-00135.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00060-of-00135.safetensors", + "model.layers.43.input_layernorm.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.gate.e_score_correction_bias": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.gate.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.switch_mlp.down_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.43.mlp.switch_mlp.gate_proj.weight": "model-00062-of-00135.safetensors", + "model.layers.43.mlp.switch_mlp.up_proj.weight": "model-00062-of-00135.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00063-of-00135.safetensors", + "model.layers.43.self_attn.k_norm.weight": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.q_norm.weight": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00061-of-00135.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00061-of-00135.safetensors", + "model.layers.44.input_layernorm.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.gate.e_score_correction_bias": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.gate.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.switch_mlp.down_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.44.mlp.switch_mlp.gate_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.44.mlp.switch_mlp.up_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00064-of-00135.safetensors", + "model.layers.44.self_attn.k_norm.weight": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.q_norm.weight": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00063-of-00135.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00063-of-00135.safetensors", + "model.layers.45.input_layernorm.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.gate.e_score_correction_bias": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.gate.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.switch_mlp.down_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.45.mlp.switch_mlp.gate_proj.weight": "model-00065-of-00135.safetensors", + "model.layers.45.mlp.switch_mlp.up_proj.weight": "model-00065-of-00135.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00066-of-00135.safetensors", + "model.layers.45.self_attn.k_norm.weight": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.q_norm.weight": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00064-of-00135.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00064-of-00135.safetensors", + "model.layers.46.input_layernorm.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.gate.e_score_correction_bias": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.gate.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.switch_mlp.down_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.46.mlp.switch_mlp.gate_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.46.mlp.switch_mlp.up_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00067-of-00135.safetensors", + "model.layers.46.self_attn.k_norm.weight": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.q_norm.weight": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00066-of-00135.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00066-of-00135.safetensors", + "model.layers.47.input_layernorm.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.gate.e_score_correction_bias": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.gate.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.switch_mlp.down_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.47.mlp.switch_mlp.gate_proj.weight": "model-00068-of-00135.safetensors", + "model.layers.47.mlp.switch_mlp.up_proj.weight": "model-00068-of-00135.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00069-of-00135.safetensors", + "model.layers.47.self_attn.k_norm.weight": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.q_norm.weight": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00067-of-00135.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00067-of-00135.safetensors", + "model.layers.48.input_layernorm.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.gate.e_score_correction_bias": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.gate.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.switch_mlp.down_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.48.mlp.switch_mlp.gate_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.48.mlp.switch_mlp.up_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00070-of-00135.safetensors", + "model.layers.48.self_attn.k_norm.weight": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.q_norm.weight": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00069-of-00135.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00069-of-00135.safetensors", + "model.layers.49.input_layernorm.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.gate.e_score_correction_bias": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.gate.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.switch_mlp.down_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.49.mlp.switch_mlp.gate_proj.weight": "model-00071-of-00135.safetensors", + "model.layers.49.mlp.switch_mlp.up_proj.weight": "model-00071-of-00135.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00072-of-00135.safetensors", + "model.layers.49.self_attn.k_norm.weight": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.q_norm.weight": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00070-of-00135.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00070-of-00135.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.gate.e_score_correction_bias": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.gate.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.switch_mlp.down_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.5.mlp.switch_mlp.gate_proj.weight": "model-00005-of-00135.safetensors", + "model.layers.5.mlp.switch_mlp.up_proj.weight": "model-00005-of-00135.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00135.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00004-of-00135.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00135.safetensors", + "model.layers.50.input_layernorm.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.gate.e_score_correction_bias": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.gate.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.switch_mlp.down_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.50.mlp.switch_mlp.gate_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.50.mlp.switch_mlp.up_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00073-of-00135.safetensors", + "model.layers.50.self_attn.k_norm.weight": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.q_norm.weight": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00072-of-00135.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00072-of-00135.safetensors", + "model.layers.51.input_layernorm.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.gate.e_score_correction_bias": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.gate.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.shared_experts.down_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.shared_experts.gate_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.shared_experts.up_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.switch_mlp.down_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.51.mlp.switch_mlp.gate_proj.weight": "model-00074-of-00135.safetensors", + "model.layers.51.mlp.switch_mlp.up_proj.weight": "model-00074-of-00135.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00075-of-00135.safetensors", + "model.layers.51.self_attn.k_norm.weight": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.q_norm.weight": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00073-of-00135.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00073-of-00135.safetensors", + "model.layers.52.input_layernorm.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.gate.e_score_correction_bias": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.gate.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.shared_experts.down_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.shared_experts.gate_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.shared_experts.up_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.switch_mlp.down_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.52.mlp.switch_mlp.gate_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.52.mlp.switch_mlp.up_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00076-of-00135.safetensors", + "model.layers.52.self_attn.k_norm.weight": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.q_norm.weight": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00075-of-00135.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00075-of-00135.safetensors", + "model.layers.53.input_layernorm.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.gate.e_score_correction_bias": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.gate.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.shared_experts.down_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.shared_experts.gate_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.shared_experts.up_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.switch_mlp.down_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.53.mlp.switch_mlp.gate_proj.weight": "model-00077-of-00135.safetensors", + "model.layers.53.mlp.switch_mlp.up_proj.weight": "model-00077-of-00135.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00078-of-00135.safetensors", + "model.layers.53.self_attn.k_norm.weight": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.q_norm.weight": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00076-of-00135.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00076-of-00135.safetensors", + "model.layers.54.input_layernorm.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.gate.e_score_correction_bias": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.gate.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.shared_experts.down_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.shared_experts.gate_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.shared_experts.up_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.switch_mlp.down_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.54.mlp.switch_mlp.gate_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.54.mlp.switch_mlp.up_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00079-of-00135.safetensors", + "model.layers.54.self_attn.k_norm.weight": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.q_norm.weight": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00078-of-00135.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00078-of-00135.safetensors", + "model.layers.55.input_layernorm.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.gate.e_score_correction_bias": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.gate.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.shared_experts.down_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.shared_experts.gate_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.shared_experts.up_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.switch_mlp.down_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.55.mlp.switch_mlp.gate_proj.weight": "model-00080-of-00135.safetensors", + "model.layers.55.mlp.switch_mlp.up_proj.weight": "model-00080-of-00135.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00081-of-00135.safetensors", + "model.layers.55.self_attn.k_norm.weight": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.q_norm.weight": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00079-of-00135.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00079-of-00135.safetensors", + "model.layers.56.input_layernorm.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.gate.e_score_correction_bias": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.gate.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.shared_experts.down_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.shared_experts.gate_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.shared_experts.up_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.switch_mlp.down_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.56.mlp.switch_mlp.gate_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.56.mlp.switch_mlp.up_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00082-of-00135.safetensors", + "model.layers.56.self_attn.k_norm.weight": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.q_norm.weight": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00081-of-00135.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00081-of-00135.safetensors", + "model.layers.57.input_layernorm.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.gate.e_score_correction_bias": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.gate.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.shared_experts.down_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.shared_experts.gate_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.shared_experts.up_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.switch_mlp.down_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.57.mlp.switch_mlp.gate_proj.weight": "model-00083-of-00135.safetensors", + "model.layers.57.mlp.switch_mlp.up_proj.weight": "model-00083-of-00135.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00084-of-00135.safetensors", + "model.layers.57.self_attn.k_norm.weight": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.q_norm.weight": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00082-of-00135.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00082-of-00135.safetensors", + "model.layers.58.input_layernorm.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.gate.e_score_correction_bias": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.gate.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.shared_experts.down_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.shared_experts.gate_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.shared_experts.up_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.switch_mlp.down_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.58.mlp.switch_mlp.gate_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.58.mlp.switch_mlp.up_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00085-of-00135.safetensors", + "model.layers.58.self_attn.k_norm.weight": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.q_norm.weight": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00084-of-00135.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00084-of-00135.safetensors", + "model.layers.59.input_layernorm.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.gate.e_score_correction_bias": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.gate.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.shared_experts.down_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.shared_experts.gate_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.shared_experts.up_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.switch_mlp.down_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.59.mlp.switch_mlp.gate_proj.weight": "model-00086-of-00135.safetensors", + "model.layers.59.mlp.switch_mlp.up_proj.weight": "model-00086-of-00135.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00087-of-00135.safetensors", + "model.layers.59.self_attn.k_norm.weight": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.q_norm.weight": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00085-of-00135.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00085-of-00135.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.gate.e_score_correction_bias": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.gate.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.switch_mlp.down_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.6.mlp.switch_mlp.gate_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.6.mlp.switch_mlp.up_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00135.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00006-of-00135.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00135.safetensors", + "model.layers.60.input_layernorm.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.gate.e_score_correction_bias": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.gate.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.shared_experts.down_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.shared_experts.gate_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.shared_experts.up_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.switch_mlp.down_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.60.mlp.switch_mlp.gate_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.60.mlp.switch_mlp.up_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00088-of-00135.safetensors", + "model.layers.60.self_attn.k_norm.weight": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.q_norm.weight": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00087-of-00135.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00087-of-00135.safetensors", + "model.layers.61.input_layernorm.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.gate.e_score_correction_bias": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.gate.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.shared_experts.down_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.shared_experts.gate_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.shared_experts.up_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.switch_mlp.down_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.61.mlp.switch_mlp.gate_proj.weight": "model-00089-of-00135.safetensors", + "model.layers.61.mlp.switch_mlp.up_proj.weight": "model-00089-of-00135.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00090-of-00135.safetensors", + "model.layers.61.self_attn.k_norm.weight": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.q_norm.weight": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00088-of-00135.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00088-of-00135.safetensors", + "model.layers.62.input_layernorm.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.gate.e_score_correction_bias": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.gate.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.shared_experts.down_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.shared_experts.gate_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.shared_experts.up_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.switch_mlp.down_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.62.mlp.switch_mlp.gate_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.62.mlp.switch_mlp.up_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00091-of-00135.safetensors", + "model.layers.62.self_attn.k_norm.weight": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.q_norm.weight": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00090-of-00135.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00090-of-00135.safetensors", + "model.layers.63.input_layernorm.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.gate.e_score_correction_bias": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.gate.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.shared_experts.down_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.shared_experts.gate_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.shared_experts.up_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.switch_mlp.down_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.63.mlp.switch_mlp.gate_proj.weight": "model-00092-of-00135.safetensors", + "model.layers.63.mlp.switch_mlp.up_proj.weight": "model-00092-of-00135.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00093-of-00135.safetensors", + "model.layers.63.self_attn.k_norm.weight": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.q_norm.weight": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00091-of-00135.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00091-of-00135.safetensors", + "model.layers.64.input_layernorm.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.gate.e_score_correction_bias": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.gate.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.shared_experts.down_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.shared_experts.gate_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.shared_experts.up_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.switch_mlp.down_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.64.mlp.switch_mlp.gate_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.64.mlp.switch_mlp.up_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00094-of-00135.safetensors", + "model.layers.64.self_attn.k_norm.weight": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.k_proj.bias": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.q_norm.weight": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.q_proj.bias": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.v_proj.bias": "model-00093-of-00135.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00093-of-00135.safetensors", + "model.layers.65.input_layernorm.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.gate.e_score_correction_bias": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.gate.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.shared_experts.down_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.shared_experts.gate_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.shared_experts.up_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.switch_mlp.down_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.65.mlp.switch_mlp.gate_proj.weight": "model-00095-of-00135.safetensors", + "model.layers.65.mlp.switch_mlp.up_proj.weight": "model-00095-of-00135.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00096-of-00135.safetensors", + "model.layers.65.self_attn.k_norm.weight": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.k_proj.bias": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.q_norm.weight": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.q_proj.bias": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.v_proj.bias": "model-00094-of-00135.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00094-of-00135.safetensors", + "model.layers.66.input_layernorm.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.gate.e_score_correction_bias": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.gate.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.shared_experts.down_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.shared_experts.gate_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.shared_experts.up_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.switch_mlp.down_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.66.mlp.switch_mlp.gate_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.66.mlp.switch_mlp.up_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00097-of-00135.safetensors", + "model.layers.66.self_attn.k_norm.weight": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.k_proj.bias": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.q_norm.weight": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.q_proj.bias": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.v_proj.bias": "model-00096-of-00135.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00096-of-00135.safetensors", + "model.layers.67.input_layernorm.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.gate.e_score_correction_bias": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.gate.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.shared_experts.down_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.shared_experts.gate_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.shared_experts.up_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.switch_mlp.down_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.67.mlp.switch_mlp.gate_proj.weight": "model-00098-of-00135.safetensors", + "model.layers.67.mlp.switch_mlp.up_proj.weight": "model-00098-of-00135.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00099-of-00135.safetensors", + "model.layers.67.self_attn.k_norm.weight": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.k_proj.bias": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.q_norm.weight": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.q_proj.bias": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.v_proj.bias": "model-00097-of-00135.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00097-of-00135.safetensors", + "model.layers.68.input_layernorm.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.gate.e_score_correction_bias": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.gate.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.shared_experts.down_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.shared_experts.gate_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.shared_experts.up_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.switch_mlp.down_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.68.mlp.switch_mlp.gate_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.68.mlp.switch_mlp.up_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00100-of-00135.safetensors", + "model.layers.68.self_attn.k_norm.weight": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.k_proj.bias": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.q_norm.weight": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.q_proj.bias": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.v_proj.bias": "model-00099-of-00135.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00099-of-00135.safetensors", + "model.layers.69.input_layernorm.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.gate.e_score_correction_bias": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.gate.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.shared_experts.down_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.shared_experts.gate_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.shared_experts.up_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.switch_mlp.down_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.69.mlp.switch_mlp.gate_proj.weight": "model-00101-of-00135.safetensors", + "model.layers.69.mlp.switch_mlp.up_proj.weight": "model-00101-of-00135.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00102-of-00135.safetensors", + "model.layers.69.self_attn.k_norm.weight": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.k_proj.bias": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.q_norm.weight": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.q_proj.bias": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.v_proj.bias": "model-00100-of-00135.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00100-of-00135.safetensors", + "model.layers.7.input_layernorm.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.gate.e_score_correction_bias": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.gate.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.switch_mlp.down_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.7.mlp.switch_mlp.gate_proj.weight": "model-00008-of-00135.safetensors", + "model.layers.7.mlp.switch_mlp.up_proj.weight": "model-00008-of-00135.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00135.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00007-of-00135.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00135.safetensors", + "model.layers.70.input_layernorm.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.gate.e_score_correction_bias": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.gate.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.shared_experts.down_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.shared_experts.gate_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.shared_experts.up_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.switch_mlp.down_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.70.mlp.switch_mlp.gate_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.70.mlp.switch_mlp.up_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00103-of-00135.safetensors", + "model.layers.70.self_attn.k_norm.weight": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.k_proj.bias": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.q_norm.weight": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.q_proj.bias": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.v_proj.bias": "model-00102-of-00135.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00102-of-00135.safetensors", + "model.layers.71.input_layernorm.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.gate.e_score_correction_bias": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.gate.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.shared_experts.down_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.shared_experts.gate_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.shared_experts.up_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.switch_mlp.down_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.71.mlp.switch_mlp.gate_proj.weight": "model-00104-of-00135.safetensors", + "model.layers.71.mlp.switch_mlp.up_proj.weight": "model-00104-of-00135.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00105-of-00135.safetensors", + "model.layers.71.self_attn.k_norm.weight": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.k_proj.bias": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.q_norm.weight": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.q_proj.bias": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.v_proj.bias": "model-00103-of-00135.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00103-of-00135.safetensors", + "model.layers.72.input_layernorm.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.gate.e_score_correction_bias": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.gate.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.shared_experts.down_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.shared_experts.gate_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.shared_experts.up_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.switch_mlp.down_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.72.mlp.switch_mlp.gate_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.72.mlp.switch_mlp.up_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00106-of-00135.safetensors", + "model.layers.72.self_attn.k_norm.weight": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.k_proj.bias": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.q_norm.weight": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.q_proj.bias": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.v_proj.bias": "model-00105-of-00135.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00105-of-00135.safetensors", + "model.layers.73.input_layernorm.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.gate.e_score_correction_bias": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.gate.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.shared_experts.down_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.shared_experts.gate_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.shared_experts.up_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.switch_mlp.down_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.73.mlp.switch_mlp.gate_proj.weight": "model-00107-of-00135.safetensors", + "model.layers.73.mlp.switch_mlp.up_proj.weight": "model-00107-of-00135.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00108-of-00135.safetensors", + "model.layers.73.self_attn.k_norm.weight": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.k_proj.bias": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.q_norm.weight": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.q_proj.bias": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.v_proj.bias": "model-00106-of-00135.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00106-of-00135.safetensors", + "model.layers.74.input_layernorm.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.gate.e_score_correction_bias": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.gate.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.shared_experts.down_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.shared_experts.gate_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.shared_experts.up_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.switch_mlp.down_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.74.mlp.switch_mlp.gate_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.74.mlp.switch_mlp.up_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00109-of-00135.safetensors", + "model.layers.74.self_attn.k_norm.weight": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.k_proj.bias": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.q_norm.weight": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.q_proj.bias": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.v_proj.bias": "model-00108-of-00135.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00108-of-00135.safetensors", + "model.layers.75.input_layernorm.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.gate.e_score_correction_bias": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.gate.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.shared_experts.down_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.shared_experts.gate_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.shared_experts.up_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.switch_mlp.down_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.75.mlp.switch_mlp.gate_proj.weight": "model-00110-of-00135.safetensors", + "model.layers.75.mlp.switch_mlp.up_proj.weight": "model-00110-of-00135.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00111-of-00135.safetensors", + "model.layers.75.self_attn.k_norm.weight": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.k_proj.bias": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.q_norm.weight": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.q_proj.bias": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.v_proj.bias": "model-00109-of-00135.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00109-of-00135.safetensors", + "model.layers.76.input_layernorm.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.gate.e_score_correction_bias": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.gate.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.shared_experts.down_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.shared_experts.gate_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.shared_experts.up_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.switch_mlp.down_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.76.mlp.switch_mlp.gate_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.76.mlp.switch_mlp.up_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00112-of-00135.safetensors", + "model.layers.76.self_attn.k_norm.weight": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.k_proj.bias": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.q_norm.weight": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.q_proj.bias": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.v_proj.bias": "model-00111-of-00135.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00111-of-00135.safetensors", + "model.layers.77.input_layernorm.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.gate.e_score_correction_bias": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.gate.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.shared_experts.down_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.shared_experts.gate_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.shared_experts.up_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.switch_mlp.down_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.77.mlp.switch_mlp.gate_proj.weight": "model-00113-of-00135.safetensors", + "model.layers.77.mlp.switch_mlp.up_proj.weight": "model-00113-of-00135.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00114-of-00135.safetensors", + "model.layers.77.self_attn.k_norm.weight": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.k_proj.bias": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.q_norm.weight": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.q_proj.bias": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.v_proj.bias": "model-00112-of-00135.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00112-of-00135.safetensors", + "model.layers.78.input_layernorm.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.gate.e_score_correction_bias": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.gate.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.shared_experts.down_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.shared_experts.gate_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.shared_experts.up_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.switch_mlp.down_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.78.mlp.switch_mlp.gate_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.78.mlp.switch_mlp.up_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00115-of-00135.safetensors", + "model.layers.78.self_attn.k_norm.weight": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.k_proj.bias": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.q_norm.weight": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.q_proj.bias": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.v_proj.bias": "model-00114-of-00135.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00114-of-00135.safetensors", + "model.layers.79.input_layernorm.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.gate.e_score_correction_bias": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.gate.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.shared_experts.down_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.shared_experts.gate_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.shared_experts.up_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.switch_mlp.down_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.79.mlp.switch_mlp.gate_proj.weight": "model-00116-of-00135.safetensors", + "model.layers.79.mlp.switch_mlp.up_proj.weight": "model-00116-of-00135.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00117-of-00135.safetensors", + "model.layers.79.self_attn.k_norm.weight": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.k_proj.bias": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.q_norm.weight": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.q_proj.bias": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.v_proj.bias": "model-00115-of-00135.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00115-of-00135.safetensors", + "model.layers.8.input_layernorm.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.gate.e_score_correction_bias": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.gate.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.switch_mlp.down_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.8.mlp.switch_mlp.gate_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.8.mlp.switch_mlp.up_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00135.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00009-of-00135.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00135.safetensors", + "model.layers.80.input_layernorm.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.gate.e_score_correction_bias": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.gate.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.shared_experts.down_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.shared_experts.gate_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.shared_experts.up_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.switch_mlp.down_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.80.mlp.switch_mlp.gate_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.80.mlp.switch_mlp.up_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00118-of-00135.safetensors", + "model.layers.80.self_attn.k_norm.weight": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.k_proj.bias": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.q_norm.weight": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.q_proj.bias": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.v_proj.bias": "model-00117-of-00135.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00117-of-00135.safetensors", + "model.layers.81.input_layernorm.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.gate.e_score_correction_bias": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.gate.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.shared_experts.down_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.shared_experts.gate_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.shared_experts.up_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.switch_mlp.down_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.81.mlp.switch_mlp.gate_proj.weight": "model-00119-of-00135.safetensors", + "model.layers.81.mlp.switch_mlp.up_proj.weight": "model-00119-of-00135.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00120-of-00135.safetensors", + "model.layers.81.self_attn.k_norm.weight": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.k_proj.bias": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.q_norm.weight": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.q_proj.bias": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.v_proj.bias": "model-00118-of-00135.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00118-of-00135.safetensors", + "model.layers.82.input_layernorm.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.gate.e_score_correction_bias": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.gate.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.shared_experts.down_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.shared_experts.gate_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.shared_experts.up_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.switch_mlp.down_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.82.mlp.switch_mlp.gate_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.82.mlp.switch_mlp.up_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00121-of-00135.safetensors", + "model.layers.82.self_attn.k_norm.weight": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.k_proj.bias": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.q_norm.weight": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.q_proj.bias": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.v_proj.bias": "model-00120-of-00135.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00120-of-00135.safetensors", + "model.layers.83.input_layernorm.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.gate.e_score_correction_bias": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.gate.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.shared_experts.down_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.shared_experts.gate_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.shared_experts.up_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.switch_mlp.down_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.83.mlp.switch_mlp.gate_proj.weight": "model-00122-of-00135.safetensors", + "model.layers.83.mlp.switch_mlp.up_proj.weight": "model-00122-of-00135.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00123-of-00135.safetensors", + "model.layers.83.self_attn.k_norm.weight": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.k_proj.bias": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.q_norm.weight": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.q_proj.bias": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.v_proj.bias": "model-00121-of-00135.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00121-of-00135.safetensors", + "model.layers.84.input_layernorm.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.gate.e_score_correction_bias": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.gate.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.shared_experts.down_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.shared_experts.gate_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.shared_experts.up_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.switch_mlp.down_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.84.mlp.switch_mlp.gate_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.84.mlp.switch_mlp.up_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00124-of-00135.safetensors", + "model.layers.84.self_attn.k_norm.weight": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.k_proj.bias": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.q_norm.weight": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.q_proj.bias": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.v_proj.bias": "model-00123-of-00135.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00123-of-00135.safetensors", + "model.layers.85.input_layernorm.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.gate.e_score_correction_bias": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.gate.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.shared_experts.down_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.shared_experts.gate_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.shared_experts.up_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.switch_mlp.down_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.85.mlp.switch_mlp.gate_proj.weight": "model-00125-of-00135.safetensors", + "model.layers.85.mlp.switch_mlp.up_proj.weight": "model-00125-of-00135.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00126-of-00135.safetensors", + "model.layers.85.self_attn.k_norm.weight": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.k_proj.bias": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.q_norm.weight": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.q_proj.bias": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.v_proj.bias": "model-00124-of-00135.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00124-of-00135.safetensors", + "model.layers.86.input_layernorm.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.gate.e_score_correction_bias": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.gate.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.shared_experts.down_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.shared_experts.gate_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.shared_experts.up_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.switch_mlp.down_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.86.mlp.switch_mlp.gate_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.86.mlp.switch_mlp.up_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00127-of-00135.safetensors", + "model.layers.86.self_attn.k_norm.weight": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.k_proj.bias": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.q_norm.weight": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.q_proj.bias": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.v_proj.bias": "model-00126-of-00135.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00126-of-00135.safetensors", + "model.layers.87.input_layernorm.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.gate.e_score_correction_bias": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.gate.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.shared_experts.down_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.shared_experts.gate_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.shared_experts.up_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.switch_mlp.down_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.87.mlp.switch_mlp.gate_proj.weight": "model-00128-of-00135.safetensors", + "model.layers.87.mlp.switch_mlp.up_proj.weight": "model-00128-of-00135.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00129-of-00135.safetensors", + "model.layers.87.self_attn.k_norm.weight": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.k_proj.bias": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.q_norm.weight": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.q_proj.bias": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.v_proj.bias": "model-00127-of-00135.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00127-of-00135.safetensors", + "model.layers.88.input_layernorm.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.gate.e_score_correction_bias": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.gate.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.shared_experts.down_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.shared_experts.gate_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.shared_experts.up_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.switch_mlp.down_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.88.mlp.switch_mlp.gate_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.88.mlp.switch_mlp.up_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.88.post_attention_layernorm.weight": "model-00130-of-00135.safetensors", + "model.layers.88.self_attn.k_norm.weight": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.k_proj.bias": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.k_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.o_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.q_norm.weight": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.q_proj.bias": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.q_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.v_proj.bias": "model-00129-of-00135.safetensors", + "model.layers.88.self_attn.v_proj.weight": "model-00129-of-00135.safetensors", + "model.layers.89.input_layernorm.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.gate.e_score_correction_bias": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.gate.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.shared_experts.down_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.shared_experts.gate_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.shared_experts.up_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.switch_mlp.down_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.89.mlp.switch_mlp.gate_proj.weight": "model-00131-of-00135.safetensors", + "model.layers.89.mlp.switch_mlp.up_proj.weight": "model-00131-of-00135.safetensors", + "model.layers.89.post_attention_layernorm.weight": "model-00132-of-00135.safetensors", + "model.layers.89.self_attn.k_norm.weight": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.k_proj.bias": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.k_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.o_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.q_norm.weight": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.q_proj.bias": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.q_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.v_proj.bias": "model-00130-of-00135.safetensors", + "model.layers.89.self_attn.v_proj.weight": "model-00130-of-00135.safetensors", + "model.layers.9.input_layernorm.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.gate.e_score_correction_bias": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.gate.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.switch_mlp.down_proj.weight": "model-00012-of-00135.safetensors", + "model.layers.9.mlp.switch_mlp.gate_proj.weight": "model-00011-of-00135.safetensors", + "model.layers.9.mlp.switch_mlp.up_proj.weight": "model-00011-of-00135.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00012-of-00135.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00010-of-00135.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00135.safetensors", + "model.layers.90.input_layernorm.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.gate.e_score_correction_bias": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.gate.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.shared_experts.down_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.shared_experts.gate_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.shared_experts.up_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.switch_mlp.down_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.90.mlp.switch_mlp.gate_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.90.mlp.switch_mlp.up_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.90.post_attention_layernorm.weight": "model-00133-of-00135.safetensors", + "model.layers.90.self_attn.k_norm.weight": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.k_proj.bias": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.k_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.o_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.q_norm.weight": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.q_proj.bias": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.q_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.v_proj.bias": "model-00132-of-00135.safetensors", + "model.layers.90.self_attn.v_proj.weight": "model-00132-of-00135.safetensors", + "model.layers.91.input_layernorm.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.gate.e_score_correction_bias": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.gate.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.shared_experts.down_proj.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.shared_experts.gate_proj.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.shared_experts.up_proj.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.switch_mlp.down_proj.weight": "model-00135-of-00135.safetensors", + "model.layers.91.mlp.switch_mlp.gate_proj.weight": "model-00134-of-00135.safetensors", + "model.layers.91.mlp.switch_mlp.up_proj.weight": "model-00134-of-00135.safetensors", + "model.layers.91.post_attention_layernorm.weight": "model-00135-of-00135.safetensors", + "model.layers.91.self_attn.k_norm.weight": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.k_proj.bias": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.k_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.o_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.q_norm.weight": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.q_proj.bias": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.q_proj.weight": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.v_proj.bias": "model-00133-of-00135.safetensors", + "model.layers.91.self_attn.v_proj.weight": "model-00133-of-00135.safetensors", + "model.norm.weight": "model-00135-of-00135.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9028cf84013844f17d7616bdec1d88e977924434 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,40 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3ed3c66baf1ec4de61840b0abf02142687bfed8 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba +size 19970700 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..053f12c9b03d5acbcc921042ea0c87a6baa5d3f7 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,325 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151343": { + "content": "<|begin_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151344": { + "content": "<|end_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151345": { + "content": "<|begin_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151346": { + "content": "<|end_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151347": { + "content": "<|code_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151348": { + "content": "<|code_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151349": { + "content": "<|code_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151360": { + "content": "/nothink", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151361": { + "content": "<|begin_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151362": { + "content": "<|end_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151363": { + "content": "<|image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151364": { + "content": "<|video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": {}, + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizer" +}