diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..2ab98ef068d62829d17c5ade1827b9f013fa2bbf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,86 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name}{arg-key-1}{arg-value-1}{arg-key-2}{arg-value-2}...{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%} +{{ '' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '' }} +{%- endif -%} +{%- if content.strip() -%} +{{ content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{- '' + tc.name -}} +{% set _args = tc.arguments %}{% for k, v in _args.items() %}{{ k }}{{ v | tojson(ensure_ascii=False) if v is not string else v }}{% endfor %}{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '' }} +{{- m.content }} +{{- '' }} +{%- else -%} +<|observation|>{% for tr in m.content %} +{{ tr.output if tr.output is defined else tr }}{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|>{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b829f085bc11b78d94e481aa8c57bcc803dd249 --- /dev/null +++ b/config.json @@ -0,0 +1,104 @@ +{ + "architectures": [ + "Glm4MoeLiteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 0, + "dtype": "bfloat16", + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "first_k_dense_replace": 1, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 10240, + "kv_lora_rank": 512, + "max_position_embeddings": 202752, + "mlp_layer_types": [ + "dense", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse", + "sparse" + ], + "model_type": "glm4_moe_lite", + "moe_intermediate_size": 1536, + "n_group": 1, + "n_routed_experts": 64, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 20, + "num_experts_per_tok": 4, + "num_hidden_layers": 47, + "num_key_value_heads": 20, + "num_nextn_predict_layers": 1, + "pad_token_id": 154820, + "partial_rotary_factor": 1.0, + "pretraining_tp": 1, + "q_lora_rank": 768, + "qk_head_dim": 256, + "qk_nope_head_dim": 192, + "qk_rope_head_dim": 64, + "rms_norm_eps": 1e-05, + "rope_interleave": true, + "rope_parameters": { + "partial_rotary_factor": 1.0, + "rope_theta": 1000000, + "rope_type": "default" + }, + "routed_scaling_factor": 1.8, + "tie_word_embeddings": false, + "topk_group": 1, + "topk_method": "noaux_tc", + "transformers_version": "5.3.0.dev0", + "use_cache": true, + "v_head_dim": 256, + "vocab_size": 154880 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..490ceb7e16d582fd69c70ea2d81186049f730dd7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "eos_token_id": [ + 154820, + 154827, + 154829 + ], + "pad_token_id": 154820, + "temperature": 1.0, + "transformers_version": "5.3.0.dev0" +} diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..898ac23a5df8d67cdec4e5bfc4636d1c9e07dcd8 --- /dev/null +++ b/model-00001-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1711e636b93cd96a70b5369e77d5812d5a69b8c02404f8eb217aa7c7c9050ab +size 1840799760 diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..476ee8a252d216252d440248168f6a437906235f --- /dev/null +++ b/model-00002-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c393b8a7f63532e896d691cee8c9f6733a4d37faa8db270e26cbec1275eddd1 +size 1270648160 diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..309c0b50273604ae4c035afb95457dad72809f87 --- /dev/null +++ b/model-00003-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:912108601767ec5cd692db307f3d1c282271b52e46e76651693fdf9278aa8135 +size 1270648160 diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..738bfee434c448f816b80bb1fdb92a72a93e6942 --- /dev/null +++ b/model-00004-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba422aa18de814c4661419aa9d7af9c11b82ceb5a31e0c2d5700b40981a40a6e +size 1270648160 diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6433d12997bb9919e1d80c57c8f9b8e0d8af9671 --- /dev/null +++ b/model-00005-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4e3af670d9eec2ff20356da13dc06e7a1984d52901ab45fffa4e8dabaeabce +size 1270648160 diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47184c3bf9b5af0f2a81e8ad0a767379aa2b36c2 --- /dev/null +++ b/model-00006-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e78a3b8839ef55ff29eb0a8fb1a609d5c8bce09232150ffca3caa3a8cc6f32 +size 1270648160 diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..925e7b74a0c1677dc6a2d95ff47b325933b747fc --- /dev/null +++ b/model-00007-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a489c47f96676b2814ab14ff427d5680d23211074203c88870ad98f6d954f21 +size 1270648160 diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9df9fe25d524ce776f8e66bddbb8011bc17d5b8 --- /dev/null +++ b/model-00008-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f6f092c634e22b79cbe87b6a550d354a144adefb4cb1a199dc818b69107634b +size 1270648160 diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..154c46297d8a5ace065f6bc2b5fd4aa8d76d9015 --- /dev/null +++ b/model-00009-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420c61c0a1efe2fbd04d439f1c0a72dca3d38eb8d9e4ff885f4d6f1c1feb22a1 +size 1270648160 diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b6c1ca7b2a6283877aa9b785dacd93d794e95ab --- /dev/null +++ b/model-00010-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc3de3a234f83572bfbf75cf4cfeecee7f41d75eeeed3ce6904f7e69ee69471 +size 1270648224 diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb50ba5b8e4ba65b2a38c496872e0968387f04e7 --- /dev/null +++ b/model-00011-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de5a4d860e7a6e51c51a06e5c2abf1c1c6f7d0f0baa8130cbd166c6d6499a428 +size 1270648368 diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ff511867cbd89a5a99bc3b98238e49910e335a6 --- /dev/null +++ b/model-00012-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625b1fddc7dfe80d5494ecd7652d44dca94fad4d39e9058b1c5a0d1187dcd6c7 +size 1270648368 diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e58f80d108f8c5f6aad2f60cde2c031d0bf1ccec --- /dev/null +++ b/model-00013-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f67b97ebb8b9a4c18ae4cc4e389cec37be12509bfd36572f0a7385fa2675fe +size 1270648368 diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e0ac43cd387e346031ad778212106c5589e223a --- /dev/null +++ b/model-00014-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa962aa9e262bc3dada85f401045e87bb4fb77f8d8b654e6cae0da6c67aa6ec +size 1270648368 diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed5196a9c8887f77708b3b1d4d7bb37dcfdfec11 --- /dev/null +++ b/model-00015-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ae54580fc7ab38fdf838125f29965b27ee8e365cf9ffc2c35275f5b0cbdfed +size 1270648368 diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e6212681be551d53c106e922255bf2d5388dda8 --- /dev/null +++ b/model-00016-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2224085be76e891a7bbe94375fe8a420ee7d413edda5175dddc4063658fc7fc1 +size 1270648368 diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5a4c9116d18eac527713088de73cc7a2cbec7df3 --- /dev/null +++ b/model-00017-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1ab4823d652d0b8ca1397323ee6228db31d63ff75c75a737c96dd25f092321 +size 1270648368 diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e3fa49656677e1a5a78667aea681ea2769752de --- /dev/null +++ b/model-00018-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875768b0b0e3ff603506549d5cb51317667f5542c03e8a7a9f0f768e070bf5d4 +size 1270648368 diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7afc501bbb046808797d4ddf15e2542be8f4ba9c --- /dev/null +++ b/model-00019-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13993ed2791b378e6e3d06e58ae1f635152e5e5944243e8c32ad40ceaebf6d13 +size 1270648368 diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8130308706479bd6fdcd9392c10ffbeda771a6f0 --- /dev/null +++ b/model-00020-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d8c02857736ba7c0cb401032a3ab54376830f0f971d909fd065cc679381c63 +size 1270648368 diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04b9e3e353312f4f96419d3db600800c099faad7 --- /dev/null +++ b/model-00021-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f08b1f5527bed6c7394792c47422ea5bdbe2de80fefd297741c83d3974c423 +size 1270648368 diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3438744b493246ff83a15e1431c1c8e7bfcc50c3 --- /dev/null +++ b/model-00022-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84983d1bec3111ec3871811e4579816de8a65f818a7a78e0b75f90204ad2fb6d +size 1270648368 diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..104cb40033c365567664a6a7835e9ce7c8dbc7bb --- /dev/null +++ b/model-00023-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2fa8369e7cba388cd3c37ce589712a78d4f4ad1bb1694a13ff801053459663 +size 1270648368 diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff133e5986fcdc5a6a6cb18528afe0aefb1f5aa3 --- /dev/null +++ b/model-00024-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404b515cea79c23c4f837f5b6766a0bb465ecea3970013947f8e7bd3191ea476 +size 1270648368 diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c3b7cf5b70ea6e80748c5c1da371ab352e54f7c --- /dev/null +++ b/model-00025-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5685fe2555f51d541deac49c0f2081b53bdd2445e848b5dedcd92714e4b24f8 +size 1270648368 diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c05de620d19fe538f222e7ca0aae127cb4db10c0 --- /dev/null +++ b/model-00026-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d05d6cd13b54a20881d77b2c6292917970dad5c3aad07c88f4a65284e857ea +size 1270648368 diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96d09a341d286f00db2c6ee0abb958448de73128 --- /dev/null +++ b/model-00027-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08789a4ee8392f9988456a98d4c245888ca45c4212202fc560c57e27d7961083 +size 1270648368 diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9111a1ac8b3c8d2ffce34aebd424cdec69435c7b --- /dev/null +++ b/model-00028-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9300215f51516694b152e575bc37c5fe69d5844aaa7236e0b9ae8861a59802 +size 1270648368 diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36fdee18dfde72a9fa660f57aca448f191c26a42 --- /dev/null +++ b/model-00029-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faac777d4323700dd06e65e7d42fb99e2c0705facc30e36fc1067e86b2ed9f3a +size 1270648368 diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..297524eb4c269b67008aa619cb921ea082055100 --- /dev/null +++ b/model-00030-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a07deac3c72b4e963d229a4458af4a2369554d4dd7675281f0c9e55a2616067 +size 1270648368 diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21fd1c926f49708eb4c3f3f0a8d6bfb18186da84 --- /dev/null +++ b/model-00031-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea2a8c0f4f9d4c044a567843f44e2f83bf56a42f5908962afb846fbde0ef96e +size 1270648368 diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9be77f4968ec6569e546271afd8f7257924509a1 --- /dev/null +++ b/model-00032-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7de6cc93bdb378f5395cd293ab2e718970425a2f2550fbfefa1f0173d68f36e +size 1270648368 diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a648c1df9de67e527b4d46e253f576cc85ba88d4 --- /dev/null +++ b/model-00033-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29aa7f7f1a2a35fabd3a9b122f6858e447c20cbdd4b821de0dd4407ba4b9d968 +size 1270648368 diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0829d251be87b79448500652c9461eccc65d8cb7 --- /dev/null +++ b/model-00034-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0271b14f35f1b291d7221b121c545810ce4da9874d4b6d10a14aee21f0a612 +size 1270648368 diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b75fc11c2aacd0066c5d7428b093fc7073993d2 --- /dev/null +++ b/model-00035-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4f8c52a7f535a07f041bd381803c5518ba14c3c63b59574b951006bd7bebd5 +size 1270648368 diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16d90e0cef2ac72fe3b13c40332e86779bc220dc --- /dev/null +++ b/model-00036-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b243584cfd3347dfd48fa48381b670d9bbea28cb7719ef1662090d64620c2f94 +size 1270648368 diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5fbefc8f751ce9be7d1138b149554aaeeeb0a62 --- /dev/null +++ b/model-00037-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374fb4eb2ae8bfdc5b053dde96d960c5363397188515e0384e460fa2cbe0b1d4 +size 1270648368 diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd0072bc1bf022c42c4f36b727786423b963e808 --- /dev/null +++ b/model-00038-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6b8be445181c7b6f7506f286dabca5f7b5f05f8d4d72ea75b08a0b0a0d2b14 +size 1270648368 diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57aea44a756fb6b755fa279ab750429325e7d3ca --- /dev/null +++ b/model-00039-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3183c2020795861e933490f838406041fce44de1831d1bb22d35ffc5bcdacbe +size 1270648368 diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67e7fe8410a0df91f9be0bf1b8ae99991396e7e7 --- /dev/null +++ b/model-00040-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b50803dee654d7aecaa1210c9a702e0eca09eae4f3e7d68545ee8ddd771a5d6 +size 1270648368 diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..921c4fc6d96595b381c3f8bd12e79b2314d175b8 --- /dev/null +++ b/model-00041-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d9fa7534ea550470eb368b3558ccdf864713425131dc11a30200c3cc042885 +size 1270648368 diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..daa0aded8abc9b9167a5f61d7e8bb43d3f00db76 --- /dev/null +++ b/model-00042-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34b3870c5299daa31e4023142a3c0887559c397219dfd3275aa71c0aa775b5d +size 1270648368 diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14cb4bc7905d591bac7cf0ce64fec69245e46487 --- /dev/null +++ b/model-00043-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8851dc03d7d6c6a21ab73eafd863e3235727a65373e7ac0342cd01a51108d8f8 +size 1270648368 diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0cf751bec925f0d57636f1eb552790039d1d8a4 --- /dev/null +++ b/model-00044-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e3c07e569045948b6257b71be7a1118c76a4d6f77dd931e293bb6079017702 +size 1270648368 diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83c4b7eed2cc49ffcb49a73462cdfb2b52d1beff --- /dev/null +++ b/model-00045-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27248f545ad05aeac8a9df4c6b781c7dfa61d2d601712c9f69593be85754992e +size 1270648368 diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..438bf947807e61bb954aa932c8cfbe745f3dedac --- /dev/null +++ b/model-00046-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a592abeb4f2d83a0237fe4fba4823a2fead0ca0a623692136065846b282c1187 +size 1270648368 diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a85070405643e43531b52f7416090360905a59e0 --- /dev/null +++ b/model-00047-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16bbbaff5890b8c82ca3637231aab9a87f9b9236979028538c9e0a7b2ed86626 +size 867987152 diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0ce2e0f0dcfd201264f52c86e8ada3b4b67973b --- /dev/null +++ b/model-00048-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5e76856a6f306161e17baefb92e226465aba0841d9638513dc056b8cfb39ff +size 2556215696 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..8729c4c2d3e549551de1ebeb824d28f565e2a012 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,9711 @@ +{ + "metadata": { + "total_parameters": 29943390976, + "total_size": 62444177360 + }, + "weight_map": { + "lm_head.weight": "model-00001-of-00048.safetensors", + "model.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_a_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_a_proj_with_mqa.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.kv_b_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_a_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_a_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_b_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.0.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.0.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.0.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.1.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.1.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.1.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.10.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.10.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.10.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.11.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.11.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.11.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.12.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.12.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.12.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.13.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.13.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.13.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.14.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.14.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.14.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.15.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.15.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.15.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.16.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.16.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.16.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.17.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.17.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.17.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.18.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.18.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.18.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.19.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.19.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.19.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.2.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.2.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.2.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.20.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.20.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.20.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.21.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.21.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.21.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.22.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.22.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.22.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.23.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.23.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.23.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.24.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.24.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.24.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.25.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.25.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.25.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.26.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.26.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.26.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.27.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.27.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.27.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.28.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.28.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.28.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.29.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.29.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.29.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.3.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.3.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.3.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.30.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.30.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.30.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.31.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.31.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.31.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.32.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.32.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.32.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.33.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.33.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.33.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.34.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.34.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.34.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.35.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.35.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.35.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.36.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.36.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.36.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.37.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.37.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.37.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.38.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.38.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.38.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.39.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.39.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.39.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.4.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.4.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.4.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.40.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.40.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.40.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.41.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.41.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.41.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.42.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.42.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.42.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.43.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.43.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.43.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.44.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.44.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.44.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.45.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.45.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.45.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.46.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.46.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.46.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.47.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.47.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.47.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.48.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.48.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.48.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.49.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.49.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.49.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.5.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.5.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.5.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.50.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.50.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.50.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.51.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.51.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.51.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.52.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.52.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.52.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.53.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.53.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.53.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.54.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.54.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.54.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.55.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.55.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.55.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.56.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.56.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.56.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.57.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.57.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.57.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.58.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.58.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.58.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.59.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.59.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.59.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.6.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.6.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.6.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.60.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.60.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.60.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.61.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.61.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.61.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.62.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.62.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.62.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.63.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.63.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.63.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.7.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.7.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.7.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.8.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.8.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.8.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.9.down_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.mlp.experts.9.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.experts.9.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.gate.e_score_correction_bias": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.gate.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.gate_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.mlp.shared_experts.up_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_a_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_a_proj_with_mqa.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.kv_b_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_a_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_a_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_b_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.gate.e_score_correction_bias": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.gate.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_a_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_a_proj_with_mqa.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.kv_b_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_a_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_a_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.10.self_attn.q_b_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.input_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.gate.e_score_correction_bias": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.gate.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_a_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_a_proj_with_mqa.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.kv_b_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_a_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_a_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.11.self_attn.q_b_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.input_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.gate.e_score_correction_bias": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.gate.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_a_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_a_proj_with_mqa.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.kv_b_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_a_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_a_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.12.self_attn.q_b_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.input_layernorm.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00013-of-00048.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.gate.e_score_correction_bias": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.gate.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_a_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_a_proj_with_mqa.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.kv_b_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_a_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_a_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.13.self_attn.q_b_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.input_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.gate.e_score_correction_bias": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.gate.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_a_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_a_proj_with_mqa.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.kv_b_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_a_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_a_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.14.self_attn.q_b_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.input_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.gate.e_score_correction_bias": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.gate.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_a_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_a_proj_with_mqa.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.kv_b_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_a_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_a_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.15.self_attn.q_b_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.input_layernorm.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00016-of-00048.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.gate.e_score_correction_bias": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.gate.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_a_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_a_proj_with_mqa.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.kv_b_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_a_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_a_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.16.self_attn.q_b_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.input_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.gate.e_score_correction_bias": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.gate.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_a_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_a_proj_with_mqa.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.kv_b_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_a_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_a_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.17.self_attn.q_b_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.input_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.gate.e_score_correction_bias": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.gate.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_a_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_a_proj_with_mqa.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.kv_b_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_a_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_a_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.18.self_attn.q_b_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.input_layernorm.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00019-of-00048.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.gate.e_score_correction_bias": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.gate.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_a_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_a_proj_with_mqa.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.kv_b_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_a_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_a_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.19.self_attn.q_b_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.0.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.0.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.0.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.1.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.1.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.1.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.10.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.10.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.10.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.11.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.11.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.11.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.12.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.12.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.12.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.13.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.13.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.13.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.14.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.14.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.14.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.15.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.15.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.15.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.16.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.16.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.16.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.17.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.17.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.17.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.18.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.18.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.18.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.19.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.19.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.19.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.2.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.2.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.2.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.20.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.20.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.20.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.21.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.21.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.21.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.22.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.22.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.22.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.23.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.23.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.23.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.24.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.24.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.24.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.25.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.25.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.25.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.26.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.26.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.26.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.27.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.27.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.27.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.28.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.28.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.28.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.29.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.29.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.29.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.3.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.3.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.3.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.30.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.30.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.30.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.31.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.31.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.31.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.32.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.32.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.32.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.33.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.33.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.33.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.34.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.34.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.34.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.35.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.35.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.35.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.36.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.36.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.36.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.37.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.37.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.37.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.38.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.38.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.38.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.39.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.39.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.39.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.4.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.4.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.4.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.40.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.40.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.40.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.41.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.41.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.41.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.42.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.42.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.42.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.43.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.43.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.43.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.44.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.44.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.44.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.45.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.45.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.45.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.46.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.46.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.46.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.47.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.47.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.47.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.48.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.48.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.48.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.49.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.49.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.49.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.5.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.5.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.5.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.50.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.50.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.50.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.51.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.51.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.51.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.52.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.52.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.52.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.53.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.53.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.53.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.54.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.54.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.54.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.55.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.55.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.55.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.56.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.56.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.56.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.57.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.57.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.57.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.58.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.58.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.58.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.59.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.59.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.59.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.6.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.6.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.6.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.60.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.60.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.60.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.61.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.61.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.61.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.62.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.62.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.62.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.63.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.63.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.63.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.7.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.7.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.7.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.8.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.8.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.8.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.9.down_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.2.mlp.experts.9.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.experts.9.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.gate.e_score_correction_bias": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.gate.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.gate_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.mlp.shared_experts.up_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_a_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_a_proj_with_mqa.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.kv_b_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_a_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_a_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.2.self_attn.q_b_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.20.input_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.gate.e_score_correction_bias": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.gate.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_a_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_a_proj_with_mqa.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.kv_b_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_a_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_a_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.20.self_attn.q_b_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.input_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.gate.e_score_correction_bias": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.gate.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_a_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_a_proj_with_mqa.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.kv_b_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_a_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_a_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.21.self_attn.q_b_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.input_layernorm.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00022-of-00048.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.gate.e_score_correction_bias": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.gate.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_a_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_a_proj_with_mqa.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.kv_b_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_a_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_a_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.22.self_attn.q_b_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.input_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.gate.e_score_correction_bias": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.gate.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_a_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_a_proj_with_mqa.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.kv_b_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_a_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_a_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.23.self_attn.q_b_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.input_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.gate.e_score_correction_bias": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.gate.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_a_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_a_proj_with_mqa.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.kv_b_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_a_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_a_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.24.self_attn.q_b_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.input_layernorm.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00025-of-00048.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.gate.e_score_correction_bias": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.gate.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_a_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_a_proj_with_mqa.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.kv_b_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_a_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_a_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.25.self_attn.q_b_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.input_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.gate.e_score_correction_bias": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.gate.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_a_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_a_proj_with_mqa.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.kv_b_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_a_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_a_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.26.self_attn.q_b_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.input_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.gate.e_score_correction_bias": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.gate.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_a_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_a_proj_with_mqa.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.kv_b_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_a_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_a_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.27.self_attn.q_b_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.input_layernorm.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.0.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.0.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.1.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.1.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.10.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.10.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.11.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.11.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.12.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.12.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.13.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.13.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.14.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.14.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.15.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.15.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.16.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.16.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.17.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.17.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.18.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.18.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.19.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.19.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.2.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.2.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.20.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.20.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.21.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.21.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.22.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.22.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.23.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.23.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.24.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.24.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.25.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.25.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.26.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.26.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.27.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.27.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.28.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.28.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.29.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.29.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.3.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.3.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.30.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.30.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.31.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.31.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.32.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.32.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.33.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.33.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.34.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.34.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.35.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.35.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.36.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.36.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.37.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.37.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.38.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.38.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.39.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.39.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.4.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.4.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.40.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.40.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.41.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.41.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.42.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.42.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.43.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.43.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.44.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.44.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.45.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.45.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.46.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.46.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.47.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.47.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.48.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.48.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.49.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.49.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.5.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.5.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.50.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.50.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.51.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.51.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.52.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.52.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.53.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.53.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.54.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.54.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.55.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.55.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.56.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.56.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.57.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.57.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.58.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.58.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.59.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.59.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.6.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.6.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.60.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.60.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.61.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.61.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.62.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.62.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.63.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.63.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.7.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.7.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.8.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.8.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.9.down_proj.weight": "model-00028-of-00048.safetensors", + "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.experts.9.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.gate.e_score_correction_bias": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.gate.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_a_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_a_proj_with_mqa.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.kv_b_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_a_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_a_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.28.self_attn.q_b_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.input_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.0.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.0.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.1.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.1.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.10.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.10.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.11.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.11.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.12.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.12.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.13.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.13.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.14.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.14.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.15.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.15.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.16.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.16.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.17.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.17.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.18.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.18.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.19.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.19.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.2.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.2.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.20.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.20.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.21.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.21.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.22.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.22.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.23.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.23.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.24.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.24.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.25.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.25.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.26.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.26.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.27.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.27.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.28.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.28.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.29.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.29.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.3.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.3.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.30.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.30.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.31.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.31.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.32.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.32.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.33.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.33.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.34.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.34.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.35.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.35.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.36.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.36.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.37.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.37.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.38.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.38.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.39.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.39.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.4.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.4.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.40.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.40.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.41.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.41.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.42.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.42.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.43.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.43.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.44.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.44.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.45.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.45.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.46.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.46.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.47.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.47.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.48.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.48.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.49.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.49.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.5.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.5.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.50.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.50.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.51.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.51.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.52.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.52.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.53.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.53.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.54.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.54.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.55.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.55.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.56.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.56.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.57.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.57.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.58.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.58.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.59.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.59.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.6.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.6.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.60.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.60.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.61.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.61.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.62.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.62.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.63.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.63.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.7.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.7.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.8.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.8.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.9.down_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.experts.9.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.gate.e_score_correction_bias": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.gate.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_a_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_a_proj_with_mqa.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.kv_b_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_a_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_a_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.29.self_attn.q_b_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.3.input_layernorm.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00003-of-00048.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.gate.e_score_correction_bias": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.gate.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_a_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_a_proj_with_mqa.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.kv_b_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_a_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_a_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.3.self_attn.q_b_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.30.input_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.0.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.0.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.1.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.1.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.10.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.10.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.11.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.11.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.12.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.12.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.13.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.13.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.14.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.14.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.15.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.15.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.16.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.16.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.17.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.17.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.18.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.18.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.19.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.19.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.2.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.2.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.20.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.20.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.21.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.21.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.22.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.22.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.23.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.23.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.24.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.24.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.25.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.25.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.26.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.26.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.27.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.27.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.28.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.28.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.29.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.29.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.3.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.3.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.30.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.30.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.31.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.31.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.32.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.32.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.33.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.33.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.34.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.34.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.35.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.35.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.36.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.36.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.37.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.37.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.38.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.38.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.39.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.39.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.4.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.4.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.40.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.40.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.41.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.41.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.42.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.42.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.43.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.43.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.44.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.44.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.45.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.45.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.46.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.46.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.47.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.47.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.48.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.48.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.49.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.49.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.5.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.5.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.50.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.50.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.51.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.51.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.52.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.52.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.53.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.53.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.54.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.54.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.55.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.55.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.56.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.56.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.57.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.57.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.58.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.58.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.59.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.59.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.6.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.6.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.60.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.60.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.61.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.61.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.62.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.62.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.63.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.63.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.7.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.7.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.8.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.8.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.9.down_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.experts.9.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.gate.e_score_correction_bias": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.gate.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_a_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_a_proj_with_mqa.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.kv_b_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_a_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_a_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.30.self_attn.q_b_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.input_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.0.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.0.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.1.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.1.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.10.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.10.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.11.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.11.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.12.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.12.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.13.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.13.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.14.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.14.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.15.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.15.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.16.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.16.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.17.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.17.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.18.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.18.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.19.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.19.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.2.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.2.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.20.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.20.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.21.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.21.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.22.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.22.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.23.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.23.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.24.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.24.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.25.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.25.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.26.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.26.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.27.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.27.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.28.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.28.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.29.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.29.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.3.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.3.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.30.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.30.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.31.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.31.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.32.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.32.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.33.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.33.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.34.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.34.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.35.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.35.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.36.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.36.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.37.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.37.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.38.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.38.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.39.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.39.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.4.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.4.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.40.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.40.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.41.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.41.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.42.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.42.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.43.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.43.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.44.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.44.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.45.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.45.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.46.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.46.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.47.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.47.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.48.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.48.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.49.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.49.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.5.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.5.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.50.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.50.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.51.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.51.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.52.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.52.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.53.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.53.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.54.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.54.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.55.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.55.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.56.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.56.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.57.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.57.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.58.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.58.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.59.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.59.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.6.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.6.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.60.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.60.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.61.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.61.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.62.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.62.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.63.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.63.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.7.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.7.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.8.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.8.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.9.down_proj.weight": "model-00031-of-00048.safetensors", + "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.experts.9.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.gate.e_score_correction_bias": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.gate.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_a_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_a_proj_with_mqa.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.kv_b_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_a_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_a_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.31.self_attn.q_b_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.input_layernorm.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.0.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.0.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.1.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.1.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.10.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.10.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.11.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.11.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.12.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.12.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.13.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.13.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.14.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.14.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.15.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.15.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.16.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.16.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.17.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.17.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.18.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.18.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.19.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.19.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.2.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.2.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.20.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.20.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.21.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.21.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.22.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.22.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.23.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.23.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.24.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.24.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.25.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.25.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.26.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.26.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.27.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.27.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.28.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.28.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.29.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.29.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.3.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.3.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.30.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.30.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.31.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.31.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.32.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.32.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.33.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.33.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.34.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.34.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.35.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.35.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.36.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.36.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.37.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.37.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.38.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.38.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.39.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.39.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.4.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.4.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.40.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.40.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.41.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.41.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.42.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.42.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.43.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.43.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.44.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.44.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.45.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.45.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.46.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.46.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.47.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.47.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.48.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.48.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.49.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.49.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.5.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.5.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.50.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.50.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.51.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.51.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.52.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.52.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.53.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.53.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.54.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.54.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.55.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.55.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.56.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.56.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.57.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.57.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.58.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.58.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.59.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.59.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.6.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.6.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.60.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.60.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.61.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.61.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.62.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.62.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.63.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.63.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.7.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.7.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.8.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.8.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.9.down_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.experts.9.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.gate.e_score_correction_bias": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.gate.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_a_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_a_proj_with_mqa.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.kv_b_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_a_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_a_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.32.self_attn.q_b_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.input_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.0.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.0.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.1.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.1.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.10.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.10.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.11.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.11.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.12.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.12.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.13.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.13.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.14.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.14.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.15.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.15.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.16.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.16.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.17.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.17.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.18.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.18.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.19.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.19.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.2.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.2.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.20.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.20.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.21.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.21.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.22.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.22.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.23.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.23.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.24.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.24.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.25.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.25.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.26.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.26.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.27.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.27.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.28.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.28.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.29.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.29.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.3.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.3.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.30.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.30.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.31.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.31.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.32.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.32.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.33.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.33.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.34.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.34.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.35.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.35.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.36.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.36.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.37.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.37.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.38.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.38.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.39.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.39.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.4.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.4.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.40.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.40.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.41.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.41.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.42.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.42.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.43.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.43.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.44.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.44.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.45.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.45.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.46.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.46.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.47.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.47.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.48.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.48.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.49.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.49.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.5.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.5.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.50.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.50.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.51.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.51.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.52.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.52.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.53.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.53.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.54.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.54.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.55.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.55.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.56.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.56.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.57.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.57.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.58.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.58.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.59.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.59.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.6.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.6.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.60.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.60.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.61.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.61.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.62.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.62.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.63.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.63.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.7.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.7.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.8.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.8.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.9.down_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.experts.9.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.gate.e_score_correction_bias": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.gate.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_a_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_a_proj_with_mqa.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.kv_b_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_a_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_a_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.33.self_attn.q_b_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.input_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.0.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.0.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.1.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.1.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.10.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.10.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.11.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.11.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.12.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.12.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.13.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.13.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.14.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.14.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.15.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.15.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.16.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.16.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.17.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.17.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.18.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.18.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.19.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.19.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.2.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.2.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.20.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.20.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.21.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.21.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.22.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.22.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.23.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.23.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.24.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.24.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.25.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.25.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.26.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.26.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.27.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.27.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.28.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.28.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.29.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.29.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.3.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.3.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.30.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.30.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.31.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.31.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.32.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.32.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.33.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.33.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.34.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.34.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.35.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.35.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.36.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.36.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.37.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.37.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.38.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.38.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.39.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.39.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.4.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.4.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.40.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.40.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.41.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.41.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.42.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.42.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.43.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.43.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.44.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.44.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.45.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.45.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.46.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.46.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.47.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.47.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.48.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.48.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.49.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.49.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.5.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.5.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.50.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.50.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.51.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.51.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.52.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.52.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.53.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.53.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.54.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.54.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.55.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.55.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.56.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.56.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.57.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.57.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.58.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.58.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.59.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.59.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.6.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.6.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.60.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.60.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.61.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.61.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.62.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.62.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.63.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.63.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.7.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.7.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.8.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.8.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.9.down_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.experts.9.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.gate.e_score_correction_bias": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.gate.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_a_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_a_proj_with_mqa.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.kv_b_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_a_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_a_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.34.self_attn.q_b_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.input_layernorm.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.0.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.0.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.1.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.1.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.10.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.10.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.11.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.11.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.12.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.12.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.13.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.13.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.14.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.14.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.15.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.15.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.16.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.16.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.17.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.17.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.18.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.18.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.19.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.19.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.2.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.2.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.20.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.20.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.21.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.21.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.22.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.22.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.23.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.23.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.24.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.24.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.25.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.25.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.26.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.26.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.27.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.27.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.28.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.28.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.29.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.29.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.3.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.3.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.30.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.30.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.31.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.31.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.32.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.32.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.33.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.33.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.34.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.34.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.35.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.35.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.36.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.36.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.37.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.37.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.38.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.38.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.39.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.39.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.4.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.4.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.40.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.40.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.41.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.41.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.42.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.42.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.43.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.43.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.44.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.44.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.45.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.45.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.46.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.46.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.47.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.47.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.48.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.48.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.49.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.49.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.5.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.5.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.50.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.50.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.51.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.51.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.52.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.52.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.53.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.53.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.54.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.54.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.55.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.55.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.56.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.56.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.57.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.57.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.58.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.58.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.59.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.59.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.6.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.6.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.60.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.60.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.61.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.61.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.62.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.62.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.63.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.63.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.7.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.7.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.8.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.8.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.9.down_proj.weight": "model-00035-of-00048.safetensors", + "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.experts.9.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.gate.e_score_correction_bias": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.gate.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_a_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_a_proj_with_mqa.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.kv_b_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_a_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_a_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.35.self_attn.q_b_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.input_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.0.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.0.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.1.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.1.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.10.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.10.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.11.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.11.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.12.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.12.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.13.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.13.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.14.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.14.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.15.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.15.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.16.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.16.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.17.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.17.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.18.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.18.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.19.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.19.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.2.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.2.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.20.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.20.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.21.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.21.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.22.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.22.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.23.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.23.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.24.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.24.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.25.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.25.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.26.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.26.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.27.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.27.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.28.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.28.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.29.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.29.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.3.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.3.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.30.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.30.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.31.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.31.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.32.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.32.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.33.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.33.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.34.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.34.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.35.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.35.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.36.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.36.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.37.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.37.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.38.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.38.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.39.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.39.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.4.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.4.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.40.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.40.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.41.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.41.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.42.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.42.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.43.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.43.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.44.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.44.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.45.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.45.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.46.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.46.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.47.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.47.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.48.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.48.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.49.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.49.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.5.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.5.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.50.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.50.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.51.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.51.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.52.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.52.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.53.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.53.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.54.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.54.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.55.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.55.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.56.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.56.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.57.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.57.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.58.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.58.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.59.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.59.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.6.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.6.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.60.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.60.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.61.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.61.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.62.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.62.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.63.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.63.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.7.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.7.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.8.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.8.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.9.down_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.experts.9.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.gate.e_score_correction_bias": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.gate.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_a_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_a_proj_with_mqa.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.kv_b_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_a_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_a_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.36.self_attn.q_b_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.input_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.0.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.0.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.1.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.1.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.10.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.10.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.11.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.11.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.12.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.12.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.13.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.13.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.14.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.14.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.15.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.15.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.16.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.16.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.17.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.17.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.18.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.18.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.19.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.19.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.2.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.2.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.20.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.20.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.21.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.21.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.22.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.22.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.23.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.23.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.24.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.24.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.25.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.25.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.26.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.26.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.27.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.27.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.28.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.28.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.29.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.29.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.3.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.3.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.30.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.30.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.31.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.31.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.32.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.32.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.33.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.33.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.34.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.34.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.35.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.35.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.36.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.36.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.37.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.37.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.38.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.38.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.39.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.39.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.4.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.4.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.40.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.40.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.41.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.41.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.42.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.42.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.43.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.43.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.44.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.44.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.45.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.45.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.46.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.46.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.47.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.47.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.48.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.48.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.49.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.49.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.5.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.5.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.50.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.50.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.51.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.51.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.52.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.52.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.53.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.53.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.54.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.54.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.55.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.55.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.56.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.56.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.57.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.57.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.58.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.58.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.59.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.59.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.6.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.6.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.60.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.60.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.61.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.61.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.62.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.62.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.63.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.63.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.7.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.7.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.8.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.8.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.9.down_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.experts.9.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.gate.e_score_correction_bias": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.gate.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_a_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_a_proj_with_mqa.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.kv_b_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_a_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_a_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.37.self_attn.q_b_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.input_layernorm.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.0.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.0.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.1.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.1.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.10.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.10.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.11.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.11.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.12.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.12.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.13.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.13.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.14.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.14.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.15.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.15.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.16.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.16.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.17.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.17.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.18.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.18.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.19.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.19.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.2.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.2.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.20.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.20.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.21.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.21.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.22.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.22.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.23.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.23.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.24.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.24.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.25.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.25.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.26.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.26.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.27.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.27.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.28.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.28.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.29.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.29.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.3.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.3.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.30.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.30.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.31.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.31.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.32.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.32.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.33.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.33.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.34.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.34.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.35.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.35.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.36.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.36.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.37.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.37.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.38.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.38.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.39.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.39.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.4.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.4.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.40.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.40.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.41.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.41.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.42.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.42.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.43.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.43.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.44.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.44.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.45.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.45.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.46.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.46.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.47.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.47.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.48.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.48.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.49.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.49.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.5.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.5.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.50.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.50.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.51.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.51.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.52.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.52.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.53.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.53.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.54.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.54.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.55.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.55.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.56.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.56.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.57.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.57.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.58.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.58.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.59.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.59.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.6.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.6.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.60.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.60.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.61.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.61.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.62.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.62.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.63.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.63.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.7.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.7.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.8.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.8.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.9.down_proj.weight": "model-00038-of-00048.safetensors", + "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.experts.9.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.gate.e_score_correction_bias": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.gate.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_a_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_a_proj_with_mqa.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.kv_b_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_a_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_a_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.38.self_attn.q_b_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.input_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.0.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.0.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.1.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.1.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.10.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.10.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.11.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.11.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.12.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.12.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.13.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.13.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.14.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.14.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.15.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.15.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.16.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.16.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.17.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.17.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.18.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.18.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.19.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.19.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.2.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.2.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.20.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.20.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.21.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.21.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.22.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.22.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.23.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.23.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.24.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.24.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.25.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.25.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.26.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.26.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.27.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.27.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.28.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.28.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.29.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.29.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.3.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.3.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.30.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.30.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.31.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.31.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.32.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.32.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.33.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.33.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.34.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.34.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.35.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.35.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.36.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.36.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.37.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.37.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.38.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.38.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.39.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.39.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.4.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.4.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.40.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.40.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.41.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.41.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.42.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.42.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.43.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.43.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.44.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.44.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.45.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.45.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.46.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.46.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.47.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.47.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.48.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.48.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.49.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.49.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.5.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.5.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.50.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.50.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.51.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.51.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.52.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.52.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.53.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.53.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.54.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.54.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.55.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.55.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.56.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.56.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.57.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.57.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.58.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.58.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.59.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.59.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.6.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.6.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.60.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.60.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.61.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.61.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.62.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.62.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.63.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.63.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.7.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.7.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.8.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.8.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.9.down_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.experts.9.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.gate.e_score_correction_bias": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.gate.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_a_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_a_proj_with_mqa.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.kv_b_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_a_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_a_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.39.self_attn.q_b_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.gate.e_score_correction_bias": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.gate.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_a_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_a_proj_with_mqa.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.kv_b_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_a_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_a_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.4.self_attn.q_b_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.40.input_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.0.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.0.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.1.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.1.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.10.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.10.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.11.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.11.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.12.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.12.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.13.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.13.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.14.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.14.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.15.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.15.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.16.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.16.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.17.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.17.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.18.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.18.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.19.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.19.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.2.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.2.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.20.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.20.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.21.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.21.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.22.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.22.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.23.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.23.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.24.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.24.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.25.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.25.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.26.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.26.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.27.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.27.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.28.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.28.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.29.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.29.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.3.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.3.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.30.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.30.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.31.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.31.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.32.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.32.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.33.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.33.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.34.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.34.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.35.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.35.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.36.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.36.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.37.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.37.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.38.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.38.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.39.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.39.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.4.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.4.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.40.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.40.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.41.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.41.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.42.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.42.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.43.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.43.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.44.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.44.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.45.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.45.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.46.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.46.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.47.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.47.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.48.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.48.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.49.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.49.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.5.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.5.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.50.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.50.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.51.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.51.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.52.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.52.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.53.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.53.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.54.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.54.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.55.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.55.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.56.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.56.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.57.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.57.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.58.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.58.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.59.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.59.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.6.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.6.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.60.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.60.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.61.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.61.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.62.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.62.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.63.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.63.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.7.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.7.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.8.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.8.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.9.down_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.experts.9.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.gate.e_score_correction_bias": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.gate.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_a_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_a_proj_with_mqa.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.kv_b_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_a_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_a_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.40.self_attn.q_b_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.input_layernorm.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.0.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.0.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.1.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.1.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.10.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.10.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.11.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.11.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.12.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.12.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.13.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.13.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.14.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.14.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.15.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.15.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.16.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.16.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.17.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.17.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.18.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.18.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.19.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.19.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.2.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.2.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.20.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.20.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.21.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.21.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.22.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.22.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.23.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.23.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.24.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.24.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.25.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.25.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.26.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.26.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.27.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.27.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.28.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.28.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.29.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.29.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.3.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.3.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.30.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.30.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.31.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.31.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.32.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.32.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.33.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.33.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.34.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.34.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.35.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.35.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.36.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.36.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.37.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.37.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.38.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.38.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.39.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.39.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.4.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.4.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.40.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.40.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.41.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.41.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.42.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.42.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.43.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.43.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.44.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.44.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.45.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.45.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.46.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.46.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.47.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.47.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.48.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.48.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.49.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.49.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.5.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.5.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.50.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.50.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.51.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.51.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.52.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.52.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.53.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.53.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.54.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.54.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.55.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.55.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.56.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.56.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.57.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.57.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.58.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.58.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.59.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.59.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.6.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.6.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.60.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.60.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.61.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.61.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.62.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.62.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.63.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.63.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.7.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.7.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.8.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.8.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.9.down_proj.weight": "model-00041-of-00048.safetensors", + "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.experts.9.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.gate.e_score_correction_bias": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.gate.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_a_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_a_proj_with_mqa.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.kv_b_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_a_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_a_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.41.self_attn.q_b_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.input_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.0.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.0.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.1.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.1.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.10.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.10.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.11.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.11.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.12.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.12.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.13.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.13.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.14.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.14.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.15.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.15.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.16.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.16.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.17.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.17.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.18.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.18.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.19.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.19.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.2.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.2.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.20.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.20.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.21.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.21.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.22.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.22.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.23.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.23.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.24.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.24.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.25.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.25.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.26.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.26.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.27.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.27.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.28.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.28.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.29.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.29.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.3.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.3.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.30.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.30.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.31.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.31.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.32.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.32.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.33.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.33.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.34.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.34.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.35.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.35.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.36.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.36.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.37.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.37.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.38.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.38.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.39.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.39.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.4.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.4.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.40.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.40.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.41.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.41.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.42.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.42.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.43.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.43.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.44.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.44.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.45.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.45.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.46.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.46.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.47.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.47.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.48.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.48.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.49.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.49.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.5.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.5.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.50.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.50.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.51.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.51.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.52.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.52.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.53.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.53.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.54.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.54.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.55.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.55.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.56.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.56.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.57.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.57.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.58.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.58.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.59.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.59.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.6.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.6.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.60.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.60.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.61.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.61.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.62.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.62.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.63.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.63.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.7.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.7.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.8.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.8.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.9.down_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.experts.9.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.gate.e_score_correction_bias": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.gate.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_a_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_a_proj_with_mqa.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.kv_b_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_a_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_a_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.42.self_attn.q_b_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.input_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.0.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.0.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.1.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.1.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.10.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.10.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.11.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.11.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.12.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.12.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.13.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.13.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.14.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.14.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.15.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.15.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.16.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.16.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.17.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.17.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.18.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.18.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.19.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.19.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.2.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.2.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.20.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.20.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.21.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.21.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.22.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.22.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.23.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.23.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.24.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.24.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.25.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.25.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.26.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.26.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.27.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.27.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.28.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.28.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.29.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.29.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.3.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.3.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.30.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.30.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.31.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.31.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.32.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.32.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.33.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.33.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.34.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.34.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.35.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.35.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.36.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.36.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.37.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.37.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.38.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.38.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.39.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.39.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.4.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.4.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.40.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.40.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.41.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.41.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.42.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.42.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.43.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.43.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.44.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.44.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.45.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.45.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.46.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.46.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.47.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.47.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.48.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.48.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.49.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.49.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.5.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.5.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.50.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.50.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.51.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.51.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.52.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.52.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.53.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.53.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.54.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.54.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.55.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.55.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.56.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.56.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.57.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.57.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.58.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.58.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.59.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.59.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.6.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.6.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.60.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.60.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.61.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.61.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.62.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.62.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.63.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.63.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.7.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.7.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.8.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.8.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.9.down_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.experts.9.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.gate.e_score_correction_bias": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.gate.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_a_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_a_proj_with_mqa.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.kv_b_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_a_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_a_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.43.self_attn.q_b_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.input_layernorm.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.0.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.0.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.1.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.1.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.10.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.10.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.11.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.11.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.12.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.12.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.13.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.13.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.14.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.14.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.15.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.15.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.16.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.16.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.17.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.17.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.18.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.18.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.19.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.19.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.2.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.2.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.20.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.20.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.21.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.21.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.22.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.22.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.23.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.23.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.24.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.24.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.25.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.25.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.26.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.26.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.27.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.27.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.28.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.28.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.29.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.29.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.3.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.3.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.30.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.30.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.31.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.31.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.32.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.32.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.33.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.33.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.34.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.34.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.35.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.35.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.36.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.36.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.37.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.37.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.38.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.38.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.39.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.39.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.4.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.4.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.40.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.40.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.41.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.41.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.42.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.42.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.43.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.43.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.44.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.44.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.45.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.45.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.46.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.46.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.47.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.47.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.48.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.48.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.49.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.49.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.5.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.5.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.50.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.50.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.51.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.51.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.52.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.52.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.53.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.53.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.54.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.54.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.55.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.55.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.56.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.56.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.57.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.57.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.58.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.58.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.59.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.59.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.6.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.6.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.60.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.60.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.61.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.61.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.62.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.62.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.63.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.63.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.7.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.7.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.8.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.8.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.9.down_proj.weight": "model-00044-of-00048.safetensors", + "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.experts.9.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.gate.e_score_correction_bias": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.gate.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_a_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_a_proj_with_mqa.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.kv_b_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_a_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_a_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.44.self_attn.q_b_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.input_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.0.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.0.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.1.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.1.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.10.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.10.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.11.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.11.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.12.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.12.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.13.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.13.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.14.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.14.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.15.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.15.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.16.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.16.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.17.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.17.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.18.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.18.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.19.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.19.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.2.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.2.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.20.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.20.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.21.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.21.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.22.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.22.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.23.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.23.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.24.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.24.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.25.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.25.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.26.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.26.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.27.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.27.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.28.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.28.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.29.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.29.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.3.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.3.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.30.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.30.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.31.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.31.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.32.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.32.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.33.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.33.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.34.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.34.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.35.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.35.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.36.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.36.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.37.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.37.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.38.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.38.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.39.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.39.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.4.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.4.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.40.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.40.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.41.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.41.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.42.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.42.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.43.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.43.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.44.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.44.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.45.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.45.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.46.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.46.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.47.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.47.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.48.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.48.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.49.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.49.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.5.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.5.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.50.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.50.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.51.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.51.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.52.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.52.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.53.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.53.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.54.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.54.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.55.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.55.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.56.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.56.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.57.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.57.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.58.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.58.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.59.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.59.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.6.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.6.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.60.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.60.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.61.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.61.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.62.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.62.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.63.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.63.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.7.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.7.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.8.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.8.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.9.down_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.experts.9.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.gate.e_score_correction_bias": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.gate.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_a_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_a_proj_with_mqa.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.kv_b_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_a_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_a_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.45.self_attn.q_b_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.input_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.0.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.0.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.1.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.1.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.10.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.10.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.11.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.11.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.12.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.12.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.13.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.13.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.14.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.14.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.15.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.15.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.16.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.16.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.17.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.17.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.18.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.18.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.19.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.19.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.2.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.2.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.20.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.20.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.21.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.21.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.22.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.22.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.23.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.23.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.24.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.24.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.25.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.25.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.26.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.26.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.27.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.27.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.28.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.28.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.29.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.29.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.3.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.3.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.30.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.30.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.31.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.31.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.32.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.32.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.33.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.33.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.34.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.34.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.35.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.35.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.36.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.36.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.37.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.37.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.38.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.38.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.39.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.39.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.4.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.4.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.40.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.40.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.41.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.41.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.42.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.42.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.43.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.43.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.44.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.44.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.45.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.45.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.46.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.46.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.47.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.47.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.48.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.48.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.49.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.49.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.5.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.5.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.50.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.50.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.51.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.51.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.52.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.52.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.53.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.53.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.54.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.54.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.55.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.55.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.56.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.56.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.57.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.57.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.58.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.58.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.59.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.59.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.6.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.6.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.60.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.60.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.61.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.61.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.62.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.62.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.63.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.63.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.7.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.7.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.8.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.8.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.9.down_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.experts.9.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.gate.e_score_correction_bias": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.gate.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_a_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_a_proj_with_mqa.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.kv_b_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_a_layernorm.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_a_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.46.self_attn.q_b_proj.weight": "model-00047-of-00048.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.gate.e_score_correction_bias": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.gate.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_a_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_a_proj_with_mqa.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.kv_b_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_a_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_a_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.5.self_attn.q_b_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00006-of-00048.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.gate.e_score_correction_bias": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.gate.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_a_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_a_proj_with_mqa.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.kv_b_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_a_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_a_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.6.self_attn.q_b_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.gate.e_score_correction_bias": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.gate.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_a_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_a_proj_with_mqa.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.kv_b_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_a_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_a_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.7.self_attn.q_b_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.gate.e_score_correction_bias": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.gate.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_a_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_a_proj_with_mqa.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.kv_b_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_a_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_a_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.8.self_attn.q_b_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.gate.e_score_correction_bias": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.gate.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_a_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_a_proj_with_mqa.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.kv_b_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_a_layernorm.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_a_proj.weight": "model-00010-of-00048.safetensors", + "model.layers.9.self_attn.q_b_proj.weight": "model-00010-of-00048.safetensors", + "model.norm.weight": "model-00047-of-00048.safetensors", + "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.47.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.32.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_a_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.input_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_a_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_a_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.47.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.37.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.q_b_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.22.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.eh_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.56.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.48.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.35.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.6.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.9.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.enorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.26.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.30.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.45.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.gate.e_score_correction_bias": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.25.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.58.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.18.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_a_proj_with_mqa.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.gate.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.1.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.49.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.hnorm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.43.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.15.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.46.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.51.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.3.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.52.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.kv_b_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.60.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.53.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.44.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.29.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.4.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.33.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.61.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.10.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.16.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.20.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.62.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.28.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.17.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.41.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.0.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.23.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.24.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.57.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.36.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.shared_head.norm.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.13.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.50.up_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.mlp.experts.2.down_proj.weight": "model-00048-of-00048.safetensors", + "model.layers.47.shared_head.head.weight": "model-00048-of-00048.safetensors", + "model.layers.47.embed_tokens.weight": "model-00048-of-00048.safetensors" + } +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..aba40197a4cdb5607f4ab7a05fb0a4ee8054fd6d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e773648cb4e65de8660ea6365e10acca112d42a854923df93db4a6f333a82d +size 20217442 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f16d05669e5b0a0b73314874fc1000895d3432ab --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "backend": "tokenizers", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "is_local": true, + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "TokenizersBackend" +}