diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2ab98ef068d62829d17c5ade1827b9f013fa2bbf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a56da60d33b6f4f403c62b149a4e52e8711d457 --- /dev/null +++ b/config.json @@ -0,0 +1,102 @@ +{ + "architectures": [ + "Glm4MoeLiteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "torch_dtype": "bfloat16", + "eos_token_id": 154820, + "first_k_dense_replace": 1, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 10240, + "kv_lora_rank": 512, + "max_position_embeddings": 202752, + "mlp_layer_types": [ + "dense", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse" + ], + "model_type": "glm4_moe_lite", + "moe_intermediate_size": 1536, + "n_group": 1, + "n_routed_experts": 64, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 20, + "num_experts_per_tok": 4, + "num_hidden_layers": 47, + "num_key_value_heads": 20, + "num_nextn_predict_layers": 1, + "pad_token_id": 154821, + "partial_rotary_factor": 1.0, + "pretraining_tp": 1, + "q_lora_rank": 768, + "qk_head_dim": 256, + "qk_nope_head_dim": 192, + "qk_rope_head_dim": 64, + "rms_norm_eps": 1e-05, + "rope_interleave": true, + "rope_parameters": { + "partial_rotary_factor": 1.0, + "rope_theta": 1000000, + "rope_type": "default" + }, + "routed_scaling_factor": 1.8, + "scoring_func": "sigmoid", + "tie_word_embeddings": false, + "topk_group": 1, + "topk_method": "noaux_tc", + "transformers_version": "5.0.0.dev0", + "unsloth_version": "2026.1.4", + "use_cache": false, + "v_head_dim": 256, + "vocab_size": 154880 +} diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c7c5b5605ae35b387380d39ee2323994ef5d219 --- /dev/null +++ b/model-00001-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debe575bb27c157958bd81a07a5d8c74e4564665afb7b5a3e159e332313f3a57 +size 1438134344 diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..463efdbc8da8ebfc871c7badb1cec2ae552f888d --- /dev/null +++ b/model-00002-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251c989c304853eb334ad4f7b26f9c9e81622ad4a08f3b006d489da11e1bf4db +size 1270648128 diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..258c5c4b0a179b388ff3f47b6e2693744a7d018e --- /dev/null +++ b/model-00003-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d23e72beab4c9b7cd635a6212bf08379a6dc02c7a0a99fa973781a6e87ee15 +size 1270648128 diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..303902aa3add97d6edf8d06ed5e20b0f8bfb1423 --- /dev/null +++ b/model-00004-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dcf21265dea0d608799ccb565d5f6f18b8a6778444f399284a8c0f9965cb307 +size 1270648128 diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad34d67203d46b9a67b87b60c3eaa2d0e26c6ab0 --- /dev/null +++ b/model-00005-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e1c43d48b91c6de73486b69912f5770a0bba884e197e69e917433e118cfd5d +size 1270648128 diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f16e92115b479b2273d50d00fe39287548419e74 --- /dev/null +++ b/model-00006-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16971aac2755cbaff01d609dd4825a5ae7822ef43f0e000ee67c57a3aadbb32 +size 1270648128 diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79a9baa3334aef960e6094f40ee679c625334a93 --- /dev/null +++ b/model-00007-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0cc5debe6959a570c2e39fb2a0b6ad91e7d060b930b7d213d31a75253fc98a +size 1270648128 diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08ebd9f7095a41783b55616b6bb057ff71781c42 --- /dev/null +++ b/model-00008-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8153deb9881ab467c649f244632af4bbf6976b3533976be143c8363f7830ce89 +size 1270648128 diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..730597a1df447a6d9a9368cfe6dce2971a75d91c --- /dev/null +++ b/model-00009-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56e2fc0631acdd7876acf6ce1d7a9f8d4673138ec5f47f12f03ad224729f3873 +size 1270648128 diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf65df197d76252126f313f50999cb85b6531c04 --- /dev/null +++ b/model-00010-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca58fd4a04bbcbed09168f091cb413608418cf5758b1d8fafa321822c3dde51d +size 1270648128 diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57a8b726e220cf55598e1d2a4d5212274943705e --- /dev/null +++ b/model-00011-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccc9a1d6b640e16147d1f40ad3ad1f7c1dccab959892197d82023eaf873879f +size 1270648328 diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f7ea1dd306e34516026f7bcf967bde1ca8aeda3 --- /dev/null +++ b/model-00012-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6869b0487364a8dfb76f7f215c05c97a181e13b1aa0d0b3f054bbb17f87025 +size 1270648328 diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37ccca4dbe3a1e2946cdb358cffa9e6dce9fb2e9 --- /dev/null +++ b/model-00013-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5def49df83a7da1725da86264826a2c6e72af4c91aa67974833c8f4efa49840 +size 1270648328 diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..817f1f64c7fa0962b644810bfcabd77bb3969e36 --- /dev/null +++ b/model-00014-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323f67c9bbbce0a12cd5d92facc1a0904d9f94f9151bc4a8d2584cec5306a477 +size 1270648328 diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f10c13c7aa4f029489e010216095ecfd972c182 --- /dev/null +++ b/model-00015-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa49f152d1020954639e48debed1b98f54f2309bc7bf0ce1a33a9055c57506a +size 1270648328 diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d151a973c2338066d97475c651c34e2240c8c3de --- /dev/null +++ b/model-00016-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a47b15a8eb90b722b1721a34e6aa8efab552d2921b426c9ff154fcfeba09a5 +size 1270648328 diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42a263642a8142ef920823328ab34494bf025a4e --- /dev/null +++ b/model-00017-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:988098bf339b73b7ba9d8e28ad47fa60f1002061cdc448a2f31b22b63dc53710 +size 1270648328 diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f11205522e821ee88b2a754407b9c03310377788 --- /dev/null +++ b/model-00018-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbaacfad2d7090face21580d30352e646b1b9b5498f91b4227e53266f57da12e +size 1270648328 diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08e772a23b721c7f813b2aea487248162d4d77ca --- /dev/null +++ b/model-00019-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf9a45fa80129690081a8bfc601c8daf26f803e0a79d85c9e3d969f04e28907 +size 1270648328 diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51c2bd6cee30fc4a9c49d9968296c8e4867a4c3d --- /dev/null +++ b/model-00020-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10e9ca78cf64aceab7ac67efdc548cca5762c28e8b9169e11b54758b406c7ca +size 1270648328 diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9ca4f7f96d9e46f01f0fea9dc8a7d1946972684 --- /dev/null +++ b/model-00021-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ce68b17b2b519c9620647807b65282e42e0d2a04a9df6bc343cb5881033b32 +size 1270648328 diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b8ef3028216f9e1ed5522eaaad45246d5c4bf98 --- /dev/null +++ b/model-00022-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbb30d19ae0796ce6675d37d6c5d80feb81e36bcd71deb1a589b3c15971965e +size 1270648328 diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af9793e04aad307d5dfc8d6f7e77042c87c3683a --- /dev/null +++ b/model-00023-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8fa18d034ef3e28beb7f7cdd69487c4b4f5a844b20c93a753a4ef542ff3d02 +size 1270648328 diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b9a19657195c6fa4bbea26bf9c39fd26c6383fa --- /dev/null +++ b/model-00024-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6efa1685bccf348470f47b02248343dff523d74f12de13e66403b254be242c4 +size 1270648328 diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..917e17ced7da616815c8d65ee688b19b5217b3f9 --- /dev/null +++ b/model-00025-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200d5afad015c7bb6d8e3a1d2d912c0065b8fe9658d51475c069234e52dbd132 +size 1270648328 diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..670a868ccf21fcbd05167875e9e7f6449970690b --- /dev/null +++ b/model-00026-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc71fc9a8f1a12d64e8ccf22c4c1eddaba90b883a361637919ce30af89b509b +size 1270648328 diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ed1ae6f2bac19e6e32078117d4add6f05afcc9 --- /dev/null +++ b/model-00027-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a6ca108869fa4a3f98a9e76c4942e5e6155ae4bb2a57edf1657dfdbc570dc4 +size 1270648328 diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0da963fce6cfbc04ba46a46fd4fe1c19ba7790db --- /dev/null +++ b/model-00028-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c5f3a352a23eecaf860d71484fc2579248767c3a90938eda943bd2e97deca5 +size 1270648328 diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bc8a405284396dc37dc423954ad16f3ba5e3300 --- /dev/null +++ b/model-00029-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20be66e2ab592d3aa87766e24a0bb9f3e3f2bd4714b696dc3bd60f88905acde +size 1270648328 diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24860edff5f50a8dffe8c39bf1f6d143003f4a2f --- /dev/null +++ b/model-00030-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43d388f94946b94a2f8a5d77edff03969ce8d3bd6c4fc437de8404151acde142 +size 1270648328 diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..005ec9aecb9a1c5ebf6958eb7ebe8341de631080 --- /dev/null +++ b/model-00031-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d485140a0d6ee6c1eb219b0d29b573d36c1651a873c8d05e3fdd87e592927f6 +size 1270648328 diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f8fc4eb87bb0a99428fcc016df960cfea26cc4d --- /dev/null +++ b/model-00032-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55ad6674e72e748f4adbaea25f4068db50ba8a10f0cce3c175ca9c7b72890c6 +size 1270648328 diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12b237e158732d1d3d403110fee97d4784ae87a5 --- /dev/null +++ b/model-00033-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ed83ccb13b244b88cf8ba08f5c655fc0a2919ab539c1347b7c9c7ff845fb25 +size 1270648328 diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2f06f8e44b99be92a279e4d5cef42107f34edfe --- /dev/null +++ b/model-00034-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa37f8f85c301a64009f08fad8c07d2de28b4102bcae7c4539fc2854da6278cd +size 1270648328 diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..490fa25fe51de27e341e3597c751847184930e18 --- /dev/null +++ b/model-00035-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee162abc4001a8995df68d49b46cb9092983625be519c65dd1db4c857d3da052 +size 1270648328 diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc8b0365508599a4ea6759780be1f3ea0e4637a8 --- /dev/null +++ b/model-00036-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9460574c236f521fa894ed11b5a5148da55502d95772cfac1b3f5303e550d49 +size 1270648328 diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..249d69f0c3365aab4cdf31e27c9e179706fe53a1 --- /dev/null +++ b/model-00037-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016c2c2418608050f9a05c4bbef0cbde3f3738665e95b63d85e2e61bfaf5bc1c +size 1270648328 diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c3e2059c5a358f7744bcc11746187fdebf4fc05 --- /dev/null +++ b/model-00038-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef0c5a2fecc59b5bbc8ce01116a79a8d65d9629103a8673a3afa015afdcabc2 +size 1270648328 diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..051b3e90de7dee753d9a4ef538d6abe180bc02ff --- /dev/null +++ b/model-00039-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ed0efbd2d034b0ea0749cc5afdd30b99ea9464a78c7c9c53c67b988e16ad23 +size 1270648328 diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e18cd305e414c753da41cce2da3ee77b69dce09 --- /dev/null +++ b/model-00040-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865b9050ca1c850b645f5d0f73c782447884f2adedaf0fea41169ceb3a22f29b +size 1270648328 diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26013e3160ea9a1d459b73b849be0e2bb7c2616d --- /dev/null +++ b/model-00041-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed6dff717442ff786f4605530924cb9273339545b3c900da55ca0c7829dc6d7a +size 1270648328 diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d710e41c9a7706ed70e7997d6b0f6f5c8b964bf6 --- /dev/null +++ b/model-00042-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6958eab6314e2ca4d6e737f16f368cf3756692d17b0028dc0ff68ce6a5666c +size 1270648328 diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dadab97cca68a5af48804752d3ffc1c512ca959 --- /dev/null +++ b/model-00043-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66c0ec1a59c4c4862fff94b6dd813eab44dcd3f2f7ce6c84b1e061d71f0f010 +size 1270648328 diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80c6a1fa237b3a65c1e763313dd3905f9fef7959 --- /dev/null +++ b/model-00044-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a089c84bedea15069966751d62b4fb8476a45c0b17c38d0cc567756448b122aa +size 1270648328 diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0242c1d1f02f2d254eda76ba3758d22bd0196171 --- /dev/null +++ b/model-00045-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65401dedb63c5fccb1eac77f4476464bd7b775b257194a0d691f06ad0531435f +size 1270648328 diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90de0aa8fc523b4c0d5902b950c6d11f108b73f7 --- /dev/null +++ b/model-00046-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f0652140c8adaea563f96d36fb0bd791ae4780d66e444d01e8db6fca634880 +size 1270648328 diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abf39bc9044586dc9d27344c5c9bceec080d82d2 --- /dev/null +++ b/model-00047-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0d7b2fd7fd5e5a00fe232e7d40e008e41bb2c6363a1832a44a33b12f36b88b +size 2539429936 diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66dc5fc40d32acc196b9686568f96f35dc95a747 --- /dev/null +++ b/model-00048-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fff90a30ca808d86dc24f9e3eda119832ab69fb1f88ae4cccfbf0e5ee409a1 +size 1287438264 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..215064e6cfa2a56b41b5ef618d82b802a103cdfc --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,9710 @@ +{ + "metadata": { + "total_size": 31221488576 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_a_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_a_proj_with_mqa.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_b_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_a_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_a_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_b_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.47.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.0.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.0.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.1.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.1.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.10.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.10.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.11.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.11.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.12.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.12.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.13.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.13.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.14.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.14.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.15.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.15.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.16.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.16.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.16.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.17.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.17.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.17.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.18.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.18.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.18.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.19.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.19.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.19.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.2.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.2.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.20.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.20.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.20.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.21.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.21.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.21.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.22.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.22.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.22.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.23.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.23.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.23.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.24.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.24.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.24.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.25.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.25.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.25.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.26.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.26.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.26.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.27.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.27.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.27.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.28.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.28.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.28.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.29.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.29.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.29.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.3.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.3.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.30.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.30.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.30.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.31.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.31.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.31.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.32.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.32.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.32.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.33.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.33.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.33.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.34.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.34.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.34.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.35.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.35.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.35.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.36.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.36.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.36.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.37.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.37.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.37.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.38.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.38.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.38.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.39.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.39.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.39.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.4.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.4.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.40.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.40.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.40.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.41.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.41.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.41.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.42.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.42.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.42.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.43.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.43.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.43.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.44.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.44.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.44.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.45.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.45.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.45.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.46.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.46.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.46.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.47.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.47.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.47.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.48.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.48.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.48.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.49.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.49.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.49.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.5.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.5.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.50.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.50.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.50.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.51.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.51.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.51.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.52.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.52.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.52.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.53.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.53.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.53.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.54.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.54.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.54.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.55.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.55.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.55.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.56.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.56.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.56.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.57.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.57.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.57.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.58.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.58.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.58.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.59.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.59.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.59.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.6.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.6.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.60.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.60.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.60.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.61.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.61.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.61.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.62.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.62.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.62.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.63.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.63.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.63.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.7.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.7.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.8.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.8.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.9.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.9.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.gate.e_score_correction_bias": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.gate.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_a_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_a_proj_with_mqa.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_b_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_a_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_a_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_b_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.0.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.0.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.1.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.1.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.10.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.10.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.11.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.11.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.12.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.12.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.13.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.13.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.14.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.14.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.15.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.15.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.16.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.16.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.16.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.17.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.17.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.17.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.18.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.18.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.18.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.19.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.19.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.19.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.2.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.2.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.20.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.20.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.20.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.21.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.21.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.21.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.22.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.22.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.22.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.23.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.23.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.23.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.24.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.24.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.24.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.25.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.25.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.25.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.26.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.26.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.26.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.27.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.27.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.27.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.28.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.28.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.28.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.29.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.29.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.29.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.3.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.3.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.30.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.30.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.30.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.31.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.31.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.31.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.32.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.32.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.32.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.33.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.33.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.33.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.34.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.34.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.34.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.35.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.35.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.35.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.36.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.36.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.36.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.37.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.37.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.37.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.38.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.38.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.38.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.39.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.39.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.39.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.4.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.4.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.40.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.40.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.40.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.41.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.41.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.41.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.42.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.42.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.42.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.43.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.43.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.43.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.44.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.44.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.44.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.45.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.45.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.45.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.46.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.46.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.46.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.47.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.47.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.47.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.48.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.48.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.48.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.49.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.49.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.49.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.5.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.5.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.50.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.50.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.50.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.51.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.51.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.51.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.52.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.52.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.52.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.53.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.53.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.53.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.54.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.54.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.54.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.55.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.55.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.55.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.56.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.56.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.56.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.57.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.57.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.57.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.58.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.58.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.58.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.59.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.59.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.59.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.6.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.6.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.60.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.60.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.60.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.61.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.61.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.61.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.62.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.62.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.62.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.63.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.63.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.63.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.7.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.7.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.8.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.8.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.9.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.9.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.gate.e_score_correction_bias": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.gate.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_a_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_a_proj_with_mqa.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_b_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_a_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_a_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_b_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.gate.e_score_correction_bias": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.gate.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_a_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_a_proj_with_mqa.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_b_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_a_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_a_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_b_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.gate.e_score_correction_bias": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.gate.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_a_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_a_proj_with_mqa.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_b_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_a_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_a_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_b_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.gate.e_score_correction_bias": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.gate.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_a_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_a_proj_with_mqa.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_b_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_a_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_a_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_b_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.gate.e_score_correction_bias": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.gate.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_a_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_a_proj_with_mqa.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_b_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_a_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_a_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_b_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.gate.e_score_correction_bias": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.gate.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_a_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_a_proj_with_mqa.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_b_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_a_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_a_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_b_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.input_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.gate.e_score_correction_bias": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.gate.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_a_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_a_proj_with_mqa.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_b_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_a_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_a_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_b_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.input_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.gate.e_score_correction_bias": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.gate.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_a_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_a_proj_with_mqa.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_b_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_a_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_a_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_b_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.input_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.gate.e_score_correction_bias": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.gate.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_a_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_a_proj_with_mqa.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_b_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_a_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_a_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_b_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.input_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.gate.e_score_correction_bias": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.gate.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_a_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_a_proj_with_mqa.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_b_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_a_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_a_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_b_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.input_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.gate.e_score_correction_bias": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.gate.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_a_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_a_proj_with_mqa.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_b_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_a_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_a_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_b_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.input_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.gate.e_score_correction_bias": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.gate.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_a_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_a_proj_with_mqa.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_b_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_a_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_a_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_b_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.input_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.gate.e_score_correction_bias": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.gate.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_a_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_a_proj_with_mqa.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_b_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_a_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_a_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_b_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.input_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.gate.e_score_correction_bias": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.gate.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_a_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_a_proj_with_mqa.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_b_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_a_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_a_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_b_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.input_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.gate.e_score_correction_bias": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.gate.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_a_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_a_proj_with_mqa.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_b_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_a_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_a_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_b_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.input_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.gate.e_score_correction_bias": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.gate.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_a_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_a_proj_with_mqa.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_b_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_a_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_a_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_b_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.input_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.gate.e_score_correction_bias": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.gate.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_a_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_a_proj_with_mqa.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_b_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_a_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_a_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_b_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.input_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.gate.e_score_correction_bias": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.gate.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_a_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_a_proj_with_mqa.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_b_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_a_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_a_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_b_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.input_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.gate.e_score_correction_bias": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.gate.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_a_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_a_proj_with_mqa.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_b_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_a_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_a_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_b_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.input_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.gate.e_score_correction_bias": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.gate.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_a_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_a_proj_with_mqa.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_b_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_a_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_a_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_b_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.input_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.gate.e_score_correction_bias": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.gate.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_a_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_a_proj_with_mqa.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_b_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_a_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_a_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_b_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.input_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.gate.e_score_correction_bias": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.gate.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_a_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_a_proj_with_mqa.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_b_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_a_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_a_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_b_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.input_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.gate.e_score_correction_bias": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.gate.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_a_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_a_proj_with_mqa.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_b_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_a_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_a_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_b_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.input_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.gate.e_score_correction_bias": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.gate.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_a_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_a_proj_with_mqa.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_b_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_a_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_a_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_b_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.input_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.gate.e_score_correction_bias": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.gate.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_a_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_a_proj_with_mqa.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_b_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_a_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_a_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_b_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.input_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.gate.e_score_correction_bias": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.gate.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_a_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_a_proj_with_mqa.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_b_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_a_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_a_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_b_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.input_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.0.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.0.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.1.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.1.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.10.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.10.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.11.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.11.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.12.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.12.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.13.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.13.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.14.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.14.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.15.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.15.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.16.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.16.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.17.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.17.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.18.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.18.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.19.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.19.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.2.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.2.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.20.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.20.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.21.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.21.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.22.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.22.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.23.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.23.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.24.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.24.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.25.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.25.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.26.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.26.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.27.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.27.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.28.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.28.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.29.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.29.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.3.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.3.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.30.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.30.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.31.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.31.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.32.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.32.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.33.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.33.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.34.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.34.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.35.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.35.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.36.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.36.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.37.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.37.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.38.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.38.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.39.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.39.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.4.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.4.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.40.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.40.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.41.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.41.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.42.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.42.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.43.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.43.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.44.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.44.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.45.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.45.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.46.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.46.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.47.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.47.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.48.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.48.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.49.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.49.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.5.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.5.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.50.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.50.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.51.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.51.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.52.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.52.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.53.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.53.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.54.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.54.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.55.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.55.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.56.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.56.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.57.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.57.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.58.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.58.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.59.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.59.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.6.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.6.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.60.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.60.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.61.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.61.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.62.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.62.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.63.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.63.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.7.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.7.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.8.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.8.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.9.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.9.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.gate.e_score_correction_bias": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.gate.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_a_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_a_proj_with_mqa.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_b_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_a_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_a_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_b_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.input_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.0.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.0.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.1.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.1.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.10.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.10.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.11.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.11.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.12.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.12.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.13.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.13.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.14.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.14.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.15.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.15.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.16.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.16.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.17.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.17.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.18.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.18.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.19.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.19.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.2.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.2.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.20.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.20.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.21.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.21.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.22.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.22.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.23.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.23.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.24.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.24.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.25.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.25.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.26.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.26.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.27.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.27.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.28.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.28.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.29.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.29.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.3.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.3.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.30.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.30.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.31.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.31.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.32.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.32.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.33.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.33.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.34.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.34.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.35.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.35.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.36.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.36.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.37.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.37.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.38.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.38.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.39.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.39.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.4.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.4.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.40.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.40.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.41.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.41.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.42.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.42.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.43.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.43.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.44.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.44.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.45.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.45.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.46.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.46.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.47.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.47.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.48.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.48.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.49.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.49.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.5.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.5.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.50.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.50.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.51.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.51.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.52.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.52.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.53.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.53.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.54.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.54.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.55.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.55.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.56.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.56.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.57.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.57.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.58.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.58.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.59.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.59.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.6.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.6.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.60.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.60.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.61.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.61.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.62.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.62.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.63.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.63.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.7.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.7.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.8.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.8.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.9.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.9.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.gate.e_score_correction_bias": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.gate.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_a_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_a_proj_with_mqa.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_b_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_a_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_a_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_b_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.input_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.0.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.0.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.1.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.1.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.10.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.10.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.11.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.11.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.12.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.12.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.13.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.13.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.14.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.14.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.15.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.15.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.16.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.16.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.17.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.17.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.18.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.18.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.19.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.19.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.2.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.2.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.20.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.20.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.21.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.21.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.22.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.22.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.23.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.23.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.24.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.24.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.25.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.25.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.26.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.26.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.27.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.27.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.28.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.28.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.29.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.29.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.3.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.3.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.30.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.30.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.31.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.31.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.32.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.32.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.33.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.33.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.34.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.34.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.35.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.35.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.36.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.36.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.37.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.37.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.38.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.38.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.39.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.39.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.4.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.4.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.40.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.40.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.41.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.41.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.42.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.42.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.43.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.43.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.44.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.44.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.45.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.45.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.46.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.46.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.47.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.47.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.48.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.48.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.49.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.49.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.5.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.5.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.50.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.50.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.51.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.51.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.52.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.52.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.53.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.53.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.54.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.54.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.55.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.55.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.56.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.56.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.57.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.57.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.58.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.58.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.59.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.59.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.6.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.6.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.60.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.60.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.61.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.61.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.62.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.62.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.63.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.63.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.7.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.7.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.8.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.8.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.9.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.9.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.gate.e_score_correction_bias": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.gate.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_a_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_a_proj_with_mqa.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_b_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_a_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_a_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_b_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.input_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.0.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.0.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.1.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.1.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.10.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.10.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.11.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.11.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.12.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.12.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.13.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.13.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.14.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.14.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.15.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.15.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.16.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.16.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.17.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.17.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.18.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.18.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.19.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.19.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.2.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.2.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.20.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.20.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.21.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.21.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.22.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.22.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.23.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.23.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.24.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.24.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.25.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.25.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.26.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.26.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.27.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.27.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.28.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.28.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.29.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.29.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.3.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.3.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.30.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.30.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.31.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.31.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.32.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.32.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.33.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.33.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.34.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.34.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.35.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.35.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.36.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.36.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.37.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.37.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.38.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.38.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.39.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.39.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.4.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.4.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.40.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.40.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.41.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.41.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.42.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.42.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.43.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.43.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.44.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.44.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.45.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.45.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.46.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.46.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.47.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.47.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.48.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.48.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.49.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.49.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.5.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.5.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.50.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.50.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.51.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.51.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.52.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.52.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.53.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.53.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.54.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.54.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.55.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.55.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.56.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.56.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.57.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.57.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.58.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.58.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.59.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.59.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.6.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.6.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.60.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.60.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.61.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.61.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.62.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.62.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.63.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.63.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.7.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.7.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.8.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.8.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.9.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.9.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.gate.e_score_correction_bias": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.gate.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_a_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_a_proj_with_mqa.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_b_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_a_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_a_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_b_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.input_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.0.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.0.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.1.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.1.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.10.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.10.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.11.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.11.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.12.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.12.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.13.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.13.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.14.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.14.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.15.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.15.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.16.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.16.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.17.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.17.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.18.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.18.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.19.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.19.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.2.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.2.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.20.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.20.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.21.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.21.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.22.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.22.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.23.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.23.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.24.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.24.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.25.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.25.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.26.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.26.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.27.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.27.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.28.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.28.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.29.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.29.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.3.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.3.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.30.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.30.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.31.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.31.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.32.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.32.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.33.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.33.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.34.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.34.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.35.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.35.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.36.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.36.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.37.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.37.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.38.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.38.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.39.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.39.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.4.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.4.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.40.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.40.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.41.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.41.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.42.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.42.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.43.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.43.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.44.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.44.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.45.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.45.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.46.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.46.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.47.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.47.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.48.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.48.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.49.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.49.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.5.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.5.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.50.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.50.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.51.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.51.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.52.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.52.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.53.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.53.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.54.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.54.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.55.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.55.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.56.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.56.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.57.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.57.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.58.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.58.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.59.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.59.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.6.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.6.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.60.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.60.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.61.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.61.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.62.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.62.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.63.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.63.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.7.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.7.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.8.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.8.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.9.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.9.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.gate.e_score_correction_bias": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.gate.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_a_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_a_proj_with_mqa.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_b_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_a_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_a_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_b_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.input_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.0.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.0.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.1.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.1.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.10.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.10.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.11.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.11.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.12.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.12.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.13.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.13.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.14.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.14.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.15.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.15.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.16.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.16.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.17.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.17.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.18.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.18.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.19.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.19.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.2.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.2.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.20.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.20.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.21.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.21.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.22.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.22.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.23.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.23.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.24.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.24.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.25.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.25.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.26.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.26.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.27.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.27.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.28.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.28.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.29.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.29.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.3.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.3.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.30.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.30.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.31.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.31.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.32.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.32.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.33.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.33.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.34.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.34.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.35.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.35.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.36.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.36.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.37.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.37.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.38.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.38.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.39.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.39.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.4.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.4.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.40.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.40.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.41.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.41.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.42.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.42.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.43.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.43.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.44.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.44.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.45.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.45.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.46.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.46.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.47.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.47.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.48.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.48.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.49.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.49.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.5.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.5.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.50.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.50.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.51.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.51.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.52.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.52.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.53.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.53.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.54.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.54.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.55.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.55.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.56.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.56.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.57.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.57.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.58.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.58.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.59.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.59.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.6.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.6.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.60.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.60.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.61.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.61.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.62.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.62.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.63.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.63.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.7.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.7.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.8.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.8.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.9.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.9.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.gate.e_score_correction_bias": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.gate.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_a_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_a_proj_with_mqa.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_b_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_a_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_a_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_b_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.input_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.0.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.0.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.1.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.1.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.10.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.10.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.11.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.11.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.12.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.12.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.13.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.13.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.14.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.14.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.15.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.15.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.16.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.16.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.17.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.17.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.18.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.18.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.19.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.19.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.2.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.2.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.20.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.20.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.21.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.21.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.22.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.22.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.23.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.23.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.24.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.24.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.25.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.25.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.26.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.26.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.27.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.27.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.28.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.28.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.29.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.29.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.3.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.3.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.30.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.30.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.31.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.31.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.32.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.32.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.33.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.33.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.34.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.34.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.35.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.35.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.36.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.36.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.37.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.37.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.38.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.38.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.39.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.39.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.4.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.4.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.40.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.40.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.41.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.41.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.42.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.42.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.43.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.43.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.44.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.44.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.45.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.45.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.46.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.46.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.47.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.47.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.48.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.48.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.49.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.49.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.5.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.5.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.50.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.50.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.51.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.51.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.52.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.52.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.53.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.53.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.54.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.54.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.55.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.55.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.56.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.56.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.57.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.57.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.58.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.58.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.59.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.59.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.6.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.6.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.60.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.60.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.61.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.61.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.62.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.62.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.63.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.63.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.7.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.7.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.8.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.8.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.9.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.9.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.gate.e_score_correction_bias": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.gate.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_a_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_a_proj_with_mqa.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_b_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_a_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_a_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_b_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.input_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.0.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.0.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.1.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.1.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.10.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.10.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.11.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.11.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.12.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.12.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.13.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.13.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.14.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.14.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.15.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.15.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.16.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.16.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.17.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.17.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.18.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.18.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.19.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.19.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.2.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.2.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.20.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.20.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.21.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.21.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.22.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.22.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.23.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.23.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.24.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.24.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.25.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.25.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.26.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.26.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.27.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.27.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.28.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.28.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.29.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.29.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.3.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.3.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.30.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.30.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.31.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.31.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.32.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.32.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.33.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.33.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.34.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.34.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.35.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.35.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.36.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.36.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.37.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.37.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.38.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.38.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.39.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.39.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.4.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.4.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.40.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.40.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.41.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.41.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.42.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.42.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.43.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.43.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.44.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.44.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.45.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.45.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.46.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.46.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.47.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.47.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.48.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.48.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.49.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.49.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.5.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.5.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.50.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.50.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.51.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.51.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.52.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.52.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.53.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.53.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.54.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.54.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.55.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.55.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.56.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.56.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.57.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.57.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.58.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.58.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.59.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.59.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.6.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.6.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.60.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.60.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.61.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.61.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.62.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.62.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.63.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.63.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.7.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.7.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.8.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.8.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.9.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.9.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.gate.e_score_correction_bias": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.gate.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_a_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_a_proj_with_mqa.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_b_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_a_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_a_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_b_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.input_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.0.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.0.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.1.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.1.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.10.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.10.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.11.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.11.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.12.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.12.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.13.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.13.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.14.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.14.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.15.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.15.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.16.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.16.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.17.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.17.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.18.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.18.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.19.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.19.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.2.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.2.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.20.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.20.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.21.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.21.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.22.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.22.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.23.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.23.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.24.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.24.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.25.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.25.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.26.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.26.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.27.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.27.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.28.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.28.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.29.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.29.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.3.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.3.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.30.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.30.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.31.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.31.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.32.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.32.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.33.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.33.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.34.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.34.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.35.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.35.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.36.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.36.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.37.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.37.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.38.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.38.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.39.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.39.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.4.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.4.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.40.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.40.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.41.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.41.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.42.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.42.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.43.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.43.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.44.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.44.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.45.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.45.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.46.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.46.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.47.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.47.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.48.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.48.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.49.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.49.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.5.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.5.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.50.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.50.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.51.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.51.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.52.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.52.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.53.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.53.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.54.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.54.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.55.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.55.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.56.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.56.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.57.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.57.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.58.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.58.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.59.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.59.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.6.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.6.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.60.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.60.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.61.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.61.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.62.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.62.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.63.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.63.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.7.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.7.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.8.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.8.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.9.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.9.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.gate.e_score_correction_bias": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.gate.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_a_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_a_proj_with_mqa.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_b_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_a_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_a_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_b_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.input_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.0.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.0.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.1.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.1.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.10.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.10.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.11.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.11.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.12.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.12.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.13.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.13.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.14.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.14.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.15.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.15.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.16.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.16.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.17.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.17.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.18.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.18.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.19.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.19.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.2.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.2.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.20.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.20.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.21.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.21.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.22.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.22.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.23.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.23.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.24.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.24.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.25.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.25.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.26.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.26.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.27.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.27.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.28.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.28.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.29.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.29.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.3.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.3.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.30.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.30.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.31.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.31.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.32.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.32.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.33.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.33.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.34.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.34.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.35.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.35.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.36.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.36.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.37.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.37.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.38.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.38.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.39.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.39.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.4.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.4.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.40.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.40.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.41.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.41.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.42.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.42.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.43.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.43.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.44.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.44.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.45.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.45.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.46.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.46.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.47.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.47.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.48.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.48.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.49.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.49.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.5.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.5.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.50.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.50.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.51.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.51.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.52.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.52.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.53.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.53.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.54.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.54.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.55.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.55.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.56.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.56.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.57.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.57.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.58.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.58.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.59.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.59.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.6.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.6.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.60.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.60.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.61.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.61.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.62.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.62.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.63.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.63.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.7.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.7.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.8.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.8.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.9.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.9.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.gate.e_score_correction_bias": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.gate.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_a_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_a_proj_with_mqa.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_b_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_a_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_a_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_b_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.input_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.0.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.0.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.1.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.1.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.10.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.10.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.11.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.11.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.12.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.12.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.13.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.13.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.14.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.14.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.15.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.15.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.16.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.16.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.17.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.17.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.18.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.18.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.19.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.19.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.2.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.2.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.20.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.20.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.21.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.21.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.22.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.22.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.23.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.23.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.24.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.24.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.25.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.25.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.26.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.26.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.27.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.27.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.28.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.28.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.29.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.29.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.3.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.3.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.30.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.30.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.31.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.31.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.32.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.32.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.33.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.33.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.34.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.34.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.35.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.35.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.36.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.36.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.37.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.37.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.38.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.38.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.39.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.39.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.4.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.4.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.40.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.40.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.41.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.41.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.42.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.42.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.43.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.43.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.44.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.44.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.45.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.45.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.46.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.46.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.47.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.47.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.48.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.48.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.49.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.49.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.5.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.5.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.50.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.50.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.51.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.51.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.52.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.52.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.53.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.53.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.54.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.54.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.55.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.55.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.56.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.56.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.57.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.57.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.58.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.58.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.59.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.59.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.6.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.6.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.60.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.60.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.61.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.61.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.62.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.62.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.63.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.63.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.7.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.7.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.8.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.8.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.9.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.9.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.gate.e_score_correction_bias": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.gate.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_a_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_a_proj_with_mqa.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_b_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_a_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_a_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_b_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.input_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.0.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.0.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.1.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.1.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.10.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.10.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.11.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.11.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.12.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.12.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.13.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.13.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.14.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.14.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.15.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.15.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.16.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.16.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.17.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.17.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.18.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.18.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.19.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.19.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.2.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.2.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.20.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.20.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.21.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.21.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.22.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.22.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.23.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.23.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.24.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.24.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.25.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.25.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.26.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.26.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.27.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.27.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.28.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.28.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.29.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.29.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.3.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.3.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.30.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.30.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.31.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.31.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.32.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.32.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.33.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.33.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.34.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.34.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.35.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.35.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.36.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.36.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.37.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.37.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.38.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.38.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.39.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.39.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.4.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.4.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.40.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.40.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.41.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.41.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.42.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.42.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.43.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.43.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.44.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.44.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.45.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.45.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.46.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.46.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.47.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.47.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.48.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.48.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.49.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.49.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.5.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.5.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.50.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.50.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.51.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.51.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.52.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.52.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.53.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.53.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.54.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.54.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.55.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.55.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.56.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.56.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.57.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.57.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.58.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.58.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.59.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.59.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.6.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.6.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.60.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.60.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.61.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.61.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.62.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.62.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.63.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.63.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.7.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.7.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.8.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.8.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.9.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.9.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.gate.e_score_correction_bias": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.gate.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_a_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_a_proj_with_mqa.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_b_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_a_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_a_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_b_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.input_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.0.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.0.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.1.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.1.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.10.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.10.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.11.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.11.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.12.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.12.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.13.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.13.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.14.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.14.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.15.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.15.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.16.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.16.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.17.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.17.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.18.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.18.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.19.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.19.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.2.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.2.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.20.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.20.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.21.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.21.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.22.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.22.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.23.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.23.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.24.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.24.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.25.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.25.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.26.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.26.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.27.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.27.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.28.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.28.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.29.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.29.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.3.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.3.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.30.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.30.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.31.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.31.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.32.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.32.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.33.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.33.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.34.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.34.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.35.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.35.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.36.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.36.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.37.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.37.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.38.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.38.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.39.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.39.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.4.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.4.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.40.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.40.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.41.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.41.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.42.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.42.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.43.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.43.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.44.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.44.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.45.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.45.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.46.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.46.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.47.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.47.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.48.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.48.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.49.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.49.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.5.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.5.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.50.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.50.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.51.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.51.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.52.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.52.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.53.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.53.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.54.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.54.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.55.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.55.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.56.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.56.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.57.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.57.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.58.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.58.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.59.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.59.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.6.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.6.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.60.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.60.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.61.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.61.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.62.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.62.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.63.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.63.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.7.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.7.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.8.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.8.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.9.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.9.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.gate.e_score_correction_bias": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.gate.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_a_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_a_proj_with_mqa.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_b_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_a_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_a_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_b_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.input_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.0.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.0.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.1.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.1.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.10.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.10.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.11.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.11.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.12.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.12.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.13.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.13.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.14.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.14.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.15.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.15.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.16.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.16.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.17.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.17.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.18.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.18.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.19.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.19.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.2.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.2.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.20.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.20.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.21.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.21.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.22.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.22.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.23.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.23.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.24.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.24.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.25.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.25.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.26.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.26.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.27.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.27.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.28.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.28.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.29.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.29.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.3.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.3.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.30.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.30.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.31.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.31.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.32.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.32.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.33.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.33.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.34.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.34.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.35.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.35.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.36.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.36.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.37.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.37.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.38.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.38.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.39.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.39.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.4.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.4.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.40.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.40.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.41.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.41.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.42.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.42.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.43.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.43.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.44.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.44.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.45.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.45.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.46.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.46.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.47.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.47.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.48.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.48.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.49.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.49.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.5.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.5.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.50.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.50.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.51.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.51.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.52.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.52.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.53.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.53.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.54.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.54.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.55.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.55.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.56.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.56.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.57.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.57.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.58.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.58.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.59.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.59.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.6.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.6.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.60.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.60.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.61.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.61.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.62.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.62.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.63.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.63.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.7.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.7.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.8.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.8.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.9.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.9.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.gate.e_score_correction_bias": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.gate.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_a_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_a_proj_with_mqa.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_b_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_a_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_a_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_b_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.input_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.0.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.0.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.1.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.1.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.10.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.10.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.11.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.11.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.12.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.12.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.13.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.13.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.14.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.14.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.15.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.15.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.16.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.16.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.17.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.17.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.18.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.18.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.19.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.19.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.2.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.2.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.20.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.20.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.21.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.21.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.22.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.22.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.23.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.23.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.24.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.24.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.25.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.25.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.26.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.26.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.27.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.27.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.28.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.28.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.29.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.29.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.3.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.3.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.30.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.30.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.31.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.31.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.32.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.32.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.33.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.33.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.34.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.34.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.35.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.35.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.36.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.36.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.37.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.37.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.38.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.38.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.39.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.39.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.4.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.4.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.40.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.40.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.41.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.41.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.42.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.42.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.43.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.43.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.44.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.44.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.45.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.45.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.46.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.46.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.47.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.47.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.48.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.48.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.49.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.49.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.5.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.5.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.50.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.50.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.51.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.51.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.52.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.52.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.53.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.53.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.54.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.54.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.55.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.55.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.56.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.56.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.57.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.57.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.58.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.58.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.59.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.59.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.6.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.6.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.60.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.60.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.61.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.61.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.62.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.62.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.63.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.63.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.7.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.7.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.8.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.8.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.9.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.9.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.gate.e_score_correction_bias": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.gate.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_a_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_a_proj_with_mqa.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_b_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_a_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_a_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_b_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.input_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.0.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.0.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.1.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.1.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.10.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.10.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.11.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.11.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.12.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.12.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.13.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.13.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.14.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.14.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.15.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.15.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.16.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.16.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.17.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.17.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.18.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.18.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.19.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.19.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.2.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.2.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.20.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.20.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.21.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.21.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.22.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.22.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.23.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.23.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.24.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.24.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.25.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.25.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.26.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.26.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.27.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.27.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.28.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.28.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.29.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.29.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.3.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.3.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.30.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.30.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.31.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.31.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.32.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.32.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.33.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.33.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.34.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.34.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.35.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.35.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.36.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.36.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.37.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.37.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.38.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.38.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.39.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.39.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.4.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.4.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.40.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.40.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.41.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.41.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.42.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.42.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.43.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.43.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.44.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.44.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.45.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.45.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.46.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.46.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.47.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.47.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.48.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.48.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.49.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.49.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.5.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.5.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.50.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.50.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.51.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.51.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.52.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.52.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.53.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.53.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.54.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.54.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.55.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.55.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.56.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.56.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.57.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.57.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.58.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.58.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.59.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.59.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.6.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.6.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.60.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.60.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.61.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.61.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.62.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.62.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.63.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.63.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.7.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.7.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.8.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.8.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.9.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.9.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.gate.e_score_correction_bias": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.gate.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_a_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_a_proj_with_mqa.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_b_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_a_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_a_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_b_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.input_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.0.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.0.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.1.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.1.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.10.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.10.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.11.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.11.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.12.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.12.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.13.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.13.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.14.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.14.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.15.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.15.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.16.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.16.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.17.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.17.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.18.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.18.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.19.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.19.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.2.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.2.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.20.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.20.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.21.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.21.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.22.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.22.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.23.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.23.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.24.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.24.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.25.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.25.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.26.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.26.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.27.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.27.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.28.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.28.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.29.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.29.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.3.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.3.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.30.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.30.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.31.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.31.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.32.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.32.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.33.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.33.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.34.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.34.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.35.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.35.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.36.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.36.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.37.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.37.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.38.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.38.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.39.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.39.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.4.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.4.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.40.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.40.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.41.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.41.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.42.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.42.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.43.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.43.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.44.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.44.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.45.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.45.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.46.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.46.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.47.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.47.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.48.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.48.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.49.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.49.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.5.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.5.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.50.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.50.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.51.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.51.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.52.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.52.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.53.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.53.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.54.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.54.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.55.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.55.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.56.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.56.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.57.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.57.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.58.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.58.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.59.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.59.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.6.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.6.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.60.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.60.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.61.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.61.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.62.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.62.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.63.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.63.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.7.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.7.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.8.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.8.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.9.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.9.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.gate.e_score_correction_bias": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.gate.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_a_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_a_proj_with_mqa.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_b_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_a_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_a_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_b_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.input_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.0.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.0.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.1.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.1.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.10.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.10.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.11.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.11.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.12.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.12.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.13.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.13.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.14.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.14.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.15.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.15.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.16.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.16.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.17.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.17.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.18.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.18.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.19.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.19.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.2.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.2.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.20.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.20.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.21.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.21.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.22.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.22.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.23.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.23.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.24.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.24.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.25.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.25.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.26.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.26.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.27.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.27.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.28.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.28.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.29.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.29.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.3.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.3.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.30.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.30.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.31.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.31.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.32.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.32.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.33.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.33.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.34.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.34.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.35.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.35.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.36.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.36.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.37.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.37.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.38.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.38.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.39.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.39.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.4.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.4.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.40.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.40.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.41.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.41.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.42.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.42.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.43.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.43.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.44.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.44.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.45.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.45.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.46.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.46.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.47.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.47.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.48.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.48.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.49.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.49.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.5.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.5.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.50.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.50.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.51.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.51.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.52.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.52.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.53.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.53.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.54.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.54.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.55.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.55.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.56.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.56.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.57.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.57.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.58.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.58.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.59.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.59.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.6.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.6.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.60.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.60.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.61.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.61.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.62.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.62.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.63.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.63.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.7.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.7.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.8.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.8.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.9.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.9.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.gate.e_score_correction_bias": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.gate.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_a_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_a_proj_with_mqa.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_b_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_a_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_a_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_b_proj.weight": "model-00046-of-00048.safetensors", + "lm_head.weight": "model-00047-of-00048.safetensors", + "model.layers.46.input_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.0.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.0.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.1.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.1.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.10.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.10.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.11.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.11.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.12.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.12.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.13.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.13.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.14.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.14.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.15.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.15.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.16.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.16.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.17.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.17.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.18.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.18.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.19.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.19.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.2.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.2.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.20.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.20.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.21.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.21.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.22.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.22.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.23.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.23.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.24.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.24.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.25.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.25.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.26.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.26.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.27.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.27.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.28.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.28.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.29.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.29.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.3.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.3.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.30.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.30.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.31.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.31.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.32.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.32.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.33.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.33.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.34.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.34.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.35.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.35.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.36.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.36.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.37.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.37.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.38.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.38.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.39.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.39.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.4.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.4.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.40.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.40.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.41.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.41.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.42.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.42.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.43.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.43.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.44.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.44.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.45.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.45.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.46.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.46.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.47.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.47.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.48.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.48.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.49.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.49.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.5.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.5.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.50.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.50.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.51.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.51.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.52.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.52.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.53.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.53.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.54.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.54.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.55.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.55.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.56.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.56.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.57.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.57.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.58.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.58.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.59.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.59.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.6.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.6.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.60.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.60.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.61.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.61.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.62.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.62.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.63.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.63.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.7.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.7.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.8.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.8.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.9.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.9.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.gate.e_score_correction_bias": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.gate.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_a_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_a_proj_with_mqa.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_b_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_a_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_a_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_b_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.47.shared_head.head.weight": "model-00047-of-00048.safetensors", + "model.norm.weight": "model-00047-of-00048.safetensors", + "model.layers.47.eh_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.enorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.hnorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.input_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.47.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.47.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.gate.e_score_correction_bias": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.gate.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_a_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_a_proj_with_mqa.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_b_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_a_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_a_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_b_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.shared_head.norm.weight": "model-00048-of-00048.safetensors" + } +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..aba40197a4cdb5607f4ab7a05fb0a4ee8054fd6d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e773648cb4e65de8660ea6365e10acca112d42a854923df93db4a6f333a82d +size 20217442 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..67bcf4bba8f1f876d736356bd0f8b107a88eacce --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,36 @@ +{ + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>" + ], + "is_local": false, + "model_max_length": 128000, + "model_specific_special_tokens": {}, + "pad_token": "[MASK]", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "TokenizersBackend", + "unk_token": null, + "chat_template": "[gMASK]\n{%- if tools -%}\n<|system|>\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{% for tool in tools %}\n{{ tool | tojson(ensure_ascii=False) }}\n{% endfor %}\n\n\nFor each function call, output the function name and arguments within the following XML format:\n{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{- content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{% for m in messages %}\n{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}\n{%- elif m.role == 'assistant' -%}\n<|assistant|>\n{%- set reasoning_content = '' %}\n{%- set content = visible_text(m.content) %}\n{%- if m.reasoning_content is string %}\n {%- set reasoning_content = m.reasoning_content %}\n{%- else %}\n {%- if '' in content %}\n {%- set reasoning_content = content.split('')[0].rstrip('\\n').split('')[-1].lstrip('\\n') %}\n {%- set content = content.split('')[-1].lstrip('\\n') %}\n {%- endif %}\n{%- endif %}\n{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}\n{{ '' + reasoning_content.strip() + ''}}\n{%- else -%}\n{{ '' }}\n{%- endif -%}\n{%- if content.strip() -%}\n{{ content.strip() }}\n{%- endif -%}\n{% if m.tool_calls %}\n{% for tc in m.tool_calls %}\n{%- if tc.function %}\n {%- set tc = tc.function %}\n{%- endif %}\n{{- '' + tc.name -}}\n{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %}\n{% endif %}\n{%- elif m.role == 'tool' -%}\n{%- if m.content is string -%}\n{%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|observation|>' }}\n{%- endif %}\n{{- '' }}\n{{- m.content }}\n{{- '' }}\n{%- else -%}\n<|observation|>{% for tr in m.content %}\n{{ tr.output if tr.output is defined else tr }}{% endfor -%}\n{% endif -%}\n{%- elif m.role == 'system' -%}\n<|system|>{{ visible_text(m.content) }}\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}}\n{%- endif -%}" +} \ No newline at end of file